From d27e6c2fbe8df0e0ec6061ec4b4e0884efc70ffa Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 17 May 2022 10:16:18 -0600
Subject: [PATCH 01/17] Add CLIP loss objectives

---
 captum/optim/_core/loss.py             | 216 ++++++++++++++++++++++++-
 captum/optim/_utils/image/common.py    |  53 +++++-
 tests/optim/core/test_loss.py          | 159 ++++++++++++++++++
 tests/optim/utils/image/test_common.py |  34 ++++
 4 files changed, 460 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 66bb4c40c2..1dca3c50ad 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -5,7 +5,11 @@
 
 import torch
 import torch.nn as nn
-from captum.optim._utils.image.common import _dot_cossim, get_neuron_pos
+from captum.optim._utils.image.common import (
+    _create_new_vector,
+    _dot_cossim,
+    get_neuron_pos,
+)
 from captum.optim._utils.typing import ModuleOutputMapping
 
 
@@ -837,6 +841,216 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return activations
 
 
+@loss_wrapper
+class L2Mean(BaseLoss):
+    """
+    Simple L2Loss penalty where the mean is used instead of the square root of the
+    sum.
+
+    Used for CLIP models in https://distill.pub/2021/multimodal-neurons/ as per the
+    supplementary code:
+    https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py
+    """
+
+    def __init__(
+        self,
+        target: torch.nn.Module,
+        channel_index: Optional[int] = None,
+        constant: float = 0.5,
+        batch_index: Optional[int] = None,
+    ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance.
+            channel_index (int, optional): Optionally only target a specific channel.
+                If set to None, all channels with be used.
+                Default: None
+            constant (float, optional): Constant value to deduct from the activations.
+                Default: 0.5
+            batch_index (int, optional): The index of activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
+        """
+        BaseLoss.__init__(self, target, batch_index)
+        self.constant = constant
+        self.channel_index = channel_index
+
+    def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
+        activations = targets_to_values[self.target][
+            self.batch_index[0] : self.batch_index[1]
+        ]
+        if self.channel_index is not None:
+            activations = activations[:, self.channel_index : self.channel_index + 1]
+        return ((activations - self.constant) ** 2).mean()
+
+
+@loss_wrapper
+class VectorLoss(BaseLoss):
+    """
+    This objective is useful for optimizing towards channel directions. This can
+    helpful for visualizing models like OpenAI's CLIP.
+
+    This loss objective is similar to the Direction objective, except it computes the
+    matrix product of the activations and vector, rather than the cosine similarity.
+    In addition to optimizing towards channel directions, this objective can also
+    perform a similar role to the ChannelActivation objective by using one-hot 1D
+    vectors.
+
+    See here for more details:
+    https://distill.pub/2021/multimodal-neurons/
+    https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py
+    """
+
+    def __init__(
+        self,
+        target: torch.nn.Module,
+        vec: torch.Tensor,
+        activation_fn: Optional[Callable] = torch.nn.functional.relu,
+        move_channel_dim_to_final_dim: bool = True,
+        batch_index: Optional[int] = None,
+    ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer instance.
+            vec (torch.Tensor): A direction vector to use, with a compatible shape for
+                computing the matrix product of the activations. See torch.matmul for
+                See torch.matmul for more details on compatible shapes:
+                https://pytorch.org/docs/stable/generated/torch.matmul.html
+                By default, vec is expected to share the same size as the channel
+                dimension of the activations.
+            activation_fn (Callable, optional): An optional activation function to
+                apply to the activations before computing the matrix product. If set
+                to None, then no activation function will be used.
+                Default: torch.nn.functional.relu
+            move_channel_dim_to_final_dim (bool, optional): Whether or not to move the
+                channel dimension to the last dimension before computing the matrix
+                product.
+                Default: True
+            batch_index (int, optional): The index of activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
+        """
+        BaseLoss.__init__(self, target, batch_index)
+        self.vec = vec
+        self.activation_fn = activation_fn
+        self.move_channel_dim_to_final_dim = move_channel_dim_to_final_dim
+
+    def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
+        activations = targets_to_values[self.target]
+        activations = activations[self.batch_index[0] : self.batch_index[1]]
+        return _create_new_vector(
+            activations,
+            vec=self.vec,
+            activation_fn=self.activation_fn,
+            move_channel_dim_to_final_dim=self.move_channel_dim_to_final_dim,
+        ).mean()
+
+
+@loss_wrapper
+class FacetLoss(BaseLoss):
+    """
+    The Facet loss objective used for Faceted Feature Visualization as described in:
+    https://distill.pub/2021/multimodal-neurons/#faceted-feature-visualization
+    https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py
+
+    The FacetLoss objective allows us to steer feature visualization towards a
+    particular theme / concept. This is done by using the weights from linear probes
+    trained on the lower layers of a model to discriminate between a certain theme or
+    concept and generic natural images.
+    """
+
+    def __init__(
+        self,
+        vec: torch.Tensor,
+        ultimate_target: torch.nn.Module,
+        layer_target: Union[torch.nn.Module, List[torch.nn.Module]],
+        facet_weights: torch.Tensor,
+        strength: Optional[Union[float, List[float]]] = None,
+        batch_index: Optional[Union[int, List[int]]] = None,
+    ) -> None:
+        """
+        Args:
+
+            vec (torch.Tensor): A 1D channel vector.
+            ultimate_target (nn.Module): The main target layer that we are
+                visualizing targets from. This is normally the penultimate layer of
+                the model.
+            layer_target (nn.Module): A layer that we have facet_weights for. This
+            target layer should be below the ultimate_target layer in the model.
+            strength (float, list of float, optional): A list of floats to use for batch
+                dimension weighting. Default is set to None for no weighting.
+                Default: None
+            facet_weights (torch.Tensor): Weighting that steers the objective
+                towards a particular theme or concept. These weight values should
+                come from linear probes trained on layers in target_layers.
+            batch_index (int, optional): The index of the activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
+        """
+        BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index)
+        self.ultimate_target = ultimate_target
+        self.layer_target = layer_target
+        self.vec = vec
+        self.strength = strength
+        assert facet_weights.dim() == 4 or facet_weights.dim() == 2
+        self.facet_weights = facet_weights
+
+    def _get_strength(self, batch: int, device: torch.device) -> torch.Tensor:
+        """
+        Calculate batch weighting.
+
+        Args:
+
+            batch (int): The size of the batch dimension to use.
+            device (torch.device): The device to use.
+
+        Returns:
+            strength_t (torch.Tensor): A tensor containing the weights to multiply the
+                different batch dimensions by.
+        """
+        if isinstance(self.strength, (tuple, list)):
+            strength_t = torch.linspace(
+                self.strength[0],
+                self.strength[1],
+                steps=batch,
+                device=device,
+            )
+        else:
+            strength_t = torch.ones([1], device=device) * self.strength
+        return strength_t[:, None, None, None]
+
+    def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
+        activations_ultimate = targets_to_values[self.ultimate_target]
+        activations_ultimate = activations_ultimate
+        new_vec = _create_new_vector(activations_ultimate, self.vec)[
+            self.batch_index[0] : self.batch_index[1]
+        ]
+        target_activations = targets_to_values[self.layer_target]
+
+        layer_grad = torch.autograd.grad(
+            outputs=new_vec,
+            inputs=target_activations,
+            grad_outputs=torch.ones_like(new_vec),
+            retain_graph=True,
+        )[0]
+        layer = target_activations[self.batch_index[0] : self.batch_index[1]]
+
+        flat_attr = layer * torch.nn.functional.relu(layer_grad.detach())
+        if self.facet_weights.dim() == 2 and flat_attr.dim() == 4:
+            flat_attr = torch.sum(flat_attr, dim=(2, 3))
+
+        if self.strength:
+            strength_t = self._get_strength(new_vec.shape[0], flat_attr.device)
+            flat_attr = strength_t * flat_attr
+        return torch.sum(flat_attr * self.facet_weights)
+
+
 def sum_loss_list(
     loss_list: List,
     to_scalar_fn: Callable[[torch.Tensor], torch.Tensor] = torch.mean,
diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py
index f1cdc5f477..31af3169ef 100644
--- a/captum/optim/_utils/image/common.py
+++ b/captum/optim/_utils/image/common.py
@@ -1,5 +1,5 @@
 import math
-from typing import List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -363,3 +363,54 @@ def hex2base10(x: str) -> float:
         * ((1 - (-x - 0.5) * 2) * color_list[1] + (-x - 0.5) * 2 * color_list[0])
     ).permute(2, 0, 1)
     return color_tensor
+
+
+def _create_new_vector(
+    x: torch.Tensor,
+    vec: torch.Tensor,
+    activation_fn: Optional[
+        Callable[[torch.Tensor], torch.Tensor]
+    ] = torch.nn.functional.relu,
+    move_channel_dim_to_final_dim: bool = True,
+) -> torch.Tensor:
+    """
+    Create a vector using a given set of activations and another vector.
+    This function is intended for use in CLIP related loss objectives.
+
+    https://distill.pub/2021/multimodal-neurons/
+    https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py
+    The einsum equation: "ijkl,j->ikl", used by the paper's associated code is the
+    same thing as: "[..., C] @ vec", where vec has a shape of 'C'.
+
+    Args:
+
+        x (torch.Tensor): A set of 2d or 4d activations.
+        vec (torch.Tensor): A direction vector to use, with a compatible shape for
+            computing the matrix product of the activations. See torch.matmul for
+            See torch.matmul for more details on compatible shapes:
+            https://pytorch.org/docs/stable/generated/torch.matmul.html
+            By default, vec is expected to share the same size as the channel or
+            feature dimension of the activations.
+        activation_fn (Callable, optional): An optional activation function to
+            apply to the activations before computing the matrix product. If set
+            to None, then no activation function will be used.
+            Default: torch.nn.functional.relu
+        move_channel_dim_to_final_dim (bool, optional): Whether or not to move the
+            channel dimension to the last dimension before computing the matrix
+            product.
+            Default: True
+
+    Returns
+        x (torch.Tensor): A  vector created from the input activations and the
+            stored vector.
+    """
+    assert x.device == vec.device
+    assert x.dim() > 1
+    if activation_fn:
+        x = activation_fn(x)
+    if x.dim() > 2 and move_channel_dim_to_final_dim:
+        permute_vals = [0] + list(range(x.dim()))[2:] + [1]
+        x = x.permute(*permute_vals)
+        return torch.mean(x @ vec, [1, 2])
+    else:
+        return (x @ vec)[:, None]
diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py
index 49c35ed9d4..4b516e4fa0 100644
--- a/tests/optim/core/test_loss.py
+++ b/tests/optim/core/test_loss.py
@@ -197,6 +197,165 @@ def test_activation_weights_1(self) -> None:
         )
 
 
+class TestL2Mean(BaseTest):
+    def test_l2mean_init(self) -> None:
+        model = torch.nn.Identity()
+        loss = opt_loss.L2Mean(model)
+        self.assertEqual(loss.constant, 0.5)
+        self.assertIsNone(loss.channel_index)
+
+    def test_l2mean_constant(self) -> None:
+        model = BasicModel_ConvNet_Optim()
+        constant = 0.5
+        loss = opt_loss.L2Mean(model.layer, constant=constant)
+        output = get_loss_value(model, loss)
+
+        expected = (CHANNEL_ACTIVATION_0_LOSS - constant) ** 2
+        self.assertAlmostEqual(output, expected, places=6)
+
+    def test_l2mean_channel_index(self) -> None:
+        model = BasicModel_ConvNet_Optim()
+        constant = 0.0
+        loss = opt_loss.L2Mean(model.layer, channel_index=0, constant=constant)
+        output = get_loss_value(model, loss)
+
+        expected = (CHANNEL_ACTIVATION_0_LOSS - constant) ** 2
+        self.assertAlmostEqual(output, expected, places=6)
+
+
+class TestVectorLoss(BaseTest):
+    def test_vectorloss_init(self) -> None:
+        model = torch.nn.Identity()
+        vec = torch.tensor([0, 1]).float()
+        loss = opt_loss.VectorLoss(model, vec=vec)
+        assertTensorAlmostEqual(self, loss.vec, vec, delta=0.0)
+        self.assertTrue(loss.move_channel_dim_to_final_dim)
+        self.assertEqual(loss.activation_fn, torch.nn.functional.relu)
+
+    def test_vectorloss_single_channel(self) -> None:
+        model = BasicModel_ConvNet_Optim()
+        vec = torch.tensor([0, 1]).float()
+        loss = opt_loss.VectorLoss(model.layer, vec=vec)
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        self.assertAlmostEqual(output, CHANNEL_ACTIVATION_1_LOSS, places=6)
+
+    def test_vectorloss_multiple_channels(self) -> None:
+        model = BasicModel_ConvNet_Optim()
+        vec = torch.tensor([1, 1]).float()
+        loss = opt_loss.VectorLoss(model.layer, vec=vec)
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        self.assertAlmostEqual(output, CHANNEL_ACTIVATION_1_LOSS * 2, places=6)
+
+
+class TestFacetLoss(BaseTest):
+    def test_facetloss_init(self) -> None:
+        model = torch.nn.Sequential(torch.nn.Identity(), torch.nn.Identity())
+        vec = torch.tensor([0, 1, 0]).float()
+        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0],
+            vec=vec,
+            facet_weights=facet_weights,
+        )
+        assertTensorAlmostEqual(self, loss.vec, vec, delta=0.0)
+        assertTensorAlmostEqual(self, loss.facet_weights, facet_weights, delta=0.0)
+
+    def test_facetloss_single_channel(self) -> None:
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)
+        layer.bias.data.fill_(1)
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([0, 1, 0]).float()
+        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+        )
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        expected = (CHANNEL_ACTIVATION_0_LOSS * 2) * 1.5
+        self.assertAlmostEqual(output, expected / 10.0, places=6)
+
+    def test_facetloss_multi_channel(self) -> None:
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)
+        layer.bias.data.fill_(1)
+
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([1, 1, 1]).float()
+        facet_weights = torch.ones([1, 2, 1, 1]) * 2.0
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+        )
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        self.assertAlmostEqual(output, 1.560000, places=6)
+
+    def test_facetloss_strength(self) -> None:
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)
+        layer.bias.data.fill_(1)
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([0, 1, 0]).float()
+        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        strength = 0.5
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+            strength=strength,
+        )
+        self.assertEqual(loss.strength, strength)
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        self.assertAlmostEqual(output, 0.1950000, places=6)
+
+    def test_facetloss_strength_batch(self) -> None:
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)
+        layer.bias.data.fill_(1)
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([0, 1, 0]).float()
+        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        strength = [0.1, 5.05]
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+            strength=strength,
+        )
+        self.assertEqual(loss.strength, strength)
+        output = get_loss_value(model, loss, input_shape=[4, 3, 6, 6])
+        self.assertAlmostEqual(output, 4.017000198364258, places=6)
+
+    def test_facetloss_2d_weights(self) -> None:
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)
+        layer.bias.data.fill_(1)
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([0, 1, 0]).float()
+        facet_weights = torch.ones([1, 2]) * 1.5
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+        )
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        expected = (CHANNEL_ACTIVATION_0_LOSS * 2) * 1.5
+        self.assertAlmostEqual(output, expected / 10.0, places=6)
+
+
 class TestCompositeLoss(BaseTest):
     def test_negative(self) -> None:
         model = BasicModel_ConvNet_Optim()
diff --git a/tests/optim/utils/image/test_common.py b/tests/optim/utils/image/test_common.py
index ef484c7135..fcece26683 100644
--- a/tests/optim/utils/image/test_common.py
+++ b/tests/optim/utils/image/test_common.py
@@ -516,3 +516,37 @@ def test_make_grid_image_single_tensor_pad_value_jit_module(self) -> None:
         )
         self.assertEqual(list(expected_output.shape), [1, 1, 7, 7])
         assertTensorAlmostEqual(self, test_output, expected_output, 0)
+
+
+class TestCreateNewVector(BaseTest):
+    def test_create_new_vector_one_hot(self) -> None:
+        x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float()
+        vec = torch.tensor([0, 1, 0]).float()
+        out = common._create_new_vector(x, vec)
+        self.assertEqual(out.item(), 37.0)
+
+    def test_create_new_vector_one_hot_batch(self) -> None:
+        x = torch.arange(0, 4 * 3 * 5 * 5).view(4, 3, 5, 5).float()
+        vec = torch.tensor([0, 1, 0]).float()
+        out = common._create_new_vector(x, vec)
+        self.assertEqual(out.tolist(), [37.0, 112.0, 187.0, 262.0])
+
+    def test_create_new_vector(self) -> None:
+        x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float()
+        vec = torch.tensor([1, 1, 1]).float()
+        out = common._create_new_vector(x, vec)
+        self.assertEqual(out.item(), 111.0)
+
+    def test_create_new_vector_activation_fn(self) -> None:
+        x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float()
+        x = x - x.mean()
+        vec = torch.tensor([1, 0, 1]).float()
+        out = common._create_new_vector(x, vec, activation_fn=torch.nn.functional.relu)
+        self.assertEqual(out.item(), 25.0)
+
+    def test_create_new_vector_no_activation_fn(self) -> None:
+        x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float()
+        x = x - x.mean()
+        vec = torch.tensor([1, 1, 1]).float()
+        out = common._create_new_vector(x, vec, activation_fn=None)
+        self.assertEqual(out.item(), 0.0)

From 77850c7ed2bb6b6065578a2d3fa38aadbfee4d90 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 17 May 2022 11:06:44 -0600
Subject: [PATCH 02/17] Fix Mypy error

---
 tests/optim/core/test_loss.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py
index 4b516e4fa0..d2cf248bdc 100644
--- a/tests/optim/core/test_loss.py
+++ b/tests/optim/core/test_loss.py
@@ -263,8 +263,8 @@ def test_facetloss_init(self) -> None:
 
     def test_facetloss_single_channel(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.data.fill_(0.1)
-        layer.bias.data.fill_(1)
+        layer.weight.fill_(0.1)
+        layer.bias.fill_(1)
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
@@ -281,8 +281,8 @@ def test_facetloss_single_channel(self) -> None:
 
     def test_facetloss_multi_channel(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.data.fill_(0.1)
-        layer.bias.data.fill_(1)
+        layer.weight.fill_(0.1)
+        layer.bias.fill_(1)
 
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
@@ -299,8 +299,8 @@ def test_facetloss_multi_channel(self) -> None:
 
     def test_facetloss_strength(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.data.fill_(0.1)
-        layer.bias.data.fill_(1)
+        layer.weight.fill_(0.1)
+        layer.bias.fill_(1)
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
@@ -319,8 +319,8 @@ def test_facetloss_strength(self) -> None:
 
     def test_facetloss_strength_batch(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.data.fill_(0.1)
-        layer.bias.data.fill_(1)
+        layer.weight.fill_(0.1)
+        layer.bias.fill_(1)
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
@@ -339,8 +339,8 @@ def test_facetloss_strength_batch(self) -> None:
 
     def test_facetloss_2d_weights(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.data.fill_(0.1)
-        layer.bias.data.fill_(1)
+        layer.weight.fill_(0.1)
+        layer.bias.fill_(1)
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()

From a4eee848254611125954cddbf057d063fc16c5c1 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 17 May 2022 11:46:17 -0600
Subject: [PATCH 03/17] Fix Mypy errors

---
 tests/optim/core/test_loss.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py
index d2cf248bdc..39d8ef4ee1 100644
--- a/tests/optim/core/test_loss.py
+++ b/tests/optim/core/test_loss.py
@@ -263,8 +263,8 @@ def test_facetloss_init(self) -> None:
 
     def test_facetloss_single_channel(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.fill_(0.1)
-        layer.bias.fill_(1)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
@@ -281,8 +281,8 @@ def test_facetloss_single_channel(self) -> None:
 
     def test_facetloss_multi_channel(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.fill_(0.1)
-        layer.bias.fill_(1)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
 
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
@@ -299,8 +299,8 @@ def test_facetloss_multi_channel(self) -> None:
 
     def test_facetloss_strength(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.fill_(0.1)
-        layer.bias.fill_(1)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
@@ -319,8 +319,8 @@ def test_facetloss_strength(self) -> None:
 
     def test_facetloss_strength_batch(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.fill_(0.1)
-        layer.bias.fill_(1)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
@@ -339,8 +339,8 @@ def test_facetloss_strength_batch(self) -> None:
 
     def test_facetloss_2d_weights(self) -> None:
         layer = torch.nn.Conv2d(2, 3, 1, bias=True)
-        layer.weight.fill_(0.1)
-        layer.bias.fill_(1)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()

From 8c28dad6172ea1965b518882a6e30bea17425140 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Wed, 18 May 2022 14:12:39 -0600
Subject: [PATCH 04/17] Fix FacetLoss docs

---
 captum/optim/_core/loss.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 1dca3c50ad..94bcdbf9f5 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -981,13 +981,16 @@ def __init__(
                 visualizing targets from. This is normally the penultimate layer of
                 the model.
             layer_target (nn.Module): A layer that we have facet_weights for. This
-            target layer should be below the ultimate_target layer in the model.
-            strength (float, list of float, optional): A list of floats to use for batch
-                dimension weighting. Default is set to None for no weighting.
-                Default: None
+                target layer should be below the ultimate_target layer in the model.
+            strength (float, list of float, optional): A single float or list of floats
+                to use for batch dimension weighting. If using a single value, then it
+                will be applied to all batch dimensions equally. Otherwise a list of
+                floats with a shape of: [start, end] should be used for torch.linspace
+                to calculate the step values in between. Default is set to None for no
+                weighting.
             facet_weights (torch.Tensor): Weighting that steers the objective
                 towards a particular theme or concept. These weight values should
-                come from linear probes trained on layers in target_layers.
+                come from linear probes trained on layer_target.
             batch_index (int, optional): The index of the activations to optimize if
                 optimizing a batch of activations. If set to None, defaults to all
                 activations in the batch.
@@ -997,6 +1000,8 @@ def __init__(
         self.ultimate_target = ultimate_target
         self.layer_target = layer_target
         self.vec = vec
+        if isinstance(strength, (tuple, list)):
+            assert len(strength) == 2
         self.strength = strength
         assert facet_weights.dim() == 4 or facet_weights.dim() == 2
         self.facet_weights = facet_weights

From 32fc6936783f839c428c7604fe584666b7fd12bf Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Wed, 18 May 2022 14:20:53 -0600
Subject: [PATCH 05/17] Improve VectorLoss docs

---
 captum/optim/_core/loss.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 94bcdbf9f5..cd52f02951 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -916,12 +916,8 @@ def __init__(
         Args:
 
             target (nn.Module): A target layer instance.
-            vec (torch.Tensor): A direction vector to use, with a compatible shape for
-                computing the matrix product of the activations. See torch.matmul for
-                See torch.matmul for more details on compatible shapes:
-                https://pytorch.org/docs/stable/generated/torch.matmul.html
-                By default, vec is expected to share the same size as the channel
-                dimension of the activations.
+            vec (torch.Tensor): A 1D channel vector with the same size as the
+                channel / feature dimension of the target layer instance.
             activation_fn (Callable, optional): An optional activation function to
                 apply to the activations before computing the matrix product. If set
                 to None, then no activation function will be used.
@@ -936,6 +932,7 @@ def __init__(
                 Default: None
         """
         BaseLoss.__init__(self, target, batch_index)
+        assert vec.dim() == 1
         self.vec = vec
         self.activation_fn = activation_fn
         self.move_channel_dim_to_final_dim = move_channel_dim_to_final_dim
@@ -976,7 +973,8 @@ def __init__(
         """
         Args:
 
-            vec (torch.Tensor): A 1D channel vector.
+            vec (torch.Tensor): A 1D channel vector with the same size as the
+                channel / feature dimension of ultimate_target.
             ultimate_target (nn.Module): The main target layer that we are
                 visualizing targets from. This is normally the penultimate layer of
                 the model.
@@ -999,6 +997,7 @@ def __init__(
         BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index)
         self.ultimate_target = ultimate_target
         self.layer_target = layer_target
+        assert vec.dim() == 1
         self.vec = vec
         if isinstance(strength, (tuple, list)):
             assert len(strength) == 2

From 862ddce625ee419d4e0ca38f5f8791fc4ac517cf Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sun, 22 May 2022 14:00:11 -0600
Subject: [PATCH 06/17] Add batch_index tests to new objectives

---
 tests/optim/core/test_loss.py | 42 +++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py
index 39d8ef4ee1..6ae0105f55 100644
--- a/tests/optim/core/test_loss.py
+++ b/tests/optim/core/test_loss.py
@@ -222,6 +222,16 @@ def test_l2mean_channel_index(self) -> None:
         expected = (CHANNEL_ACTIVATION_0_LOSS - constant) ** 2
         self.assertAlmostEqual(output, expected, places=6)
 
+    def test_l2mean_batch_index(self) -> None:
+        raise unittest.SkipTest("Remove after PR merged")
+        model = torch.nn.Identity()
+        batch_index = 1
+        loss = opt_loss.L2Mean(model, batch_index=batch_index)
+
+        model_input = torch.arange(0, 5 * 4 * 5 * 5).view(5, 4, 5, 5).float()
+        output = get_loss_value(model, loss, model_input)
+        self.assertEqual(output.item(), 23034.25)
+
 
 class TestVectorLoss(BaseTest):
     def test_vectorloss_init(self) -> None:
@@ -246,6 +256,17 @@ def test_vectorloss_multiple_channels(self) -> None:
         output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
         self.assertAlmostEqual(output, CHANNEL_ACTIVATION_1_LOSS * 2, places=6)
 
+    def test_vectorloss_batch_index(self) -> None:
+        raise unittest.SkipTest("Remove after PR merged")
+        model = torch.nn.Identity()
+        batch_index = 1
+        vec = torch.tensor([0, 1, 0, 0]).float()
+        loss = opt_loss.VectorLoss(model, vec=vec, batch_index=batch_index)
+
+        model_input = torch.arange(0, 5 * 4 * 5 * 5).view(5, 4, 5, 5).float()
+        output = get_loss_value(model, loss, model_input)
+        self.assertEqual(output.item(), 137.0)
+
 
 class TestFacetLoss(BaseTest):
     def test_facetloss_init(self) -> None:
@@ -355,6 +376,27 @@ def test_facetloss_2d_weights(self) -> None:
         expected = (CHANNEL_ACTIVATION_0_LOSS * 2) * 1.5
         self.assertAlmostEqual(output, expected / 10.0, places=6)
 
+    def test_facetloss_batch_index(self) -> None:
+        raise unittest.SkipTest("Remove after PR merged")
+        batch_index = 1
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([0, 1, 0]).float()
+        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+            batch_index=batch_index,
+        )
+        model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float()
+        output = get_loss_value(model, loss, model_input)
+        self.assertAlmostEqual(output.item(), 10.38000202178955, places=5)
+
 
 class TestCompositeLoss(BaseTest):
     def test_negative(self) -> None:

From 0e7d0f45c2fac21a6a793147f68dfd1a5f9f7eea Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Wed, 25 May 2022 14:47:42 -0600
Subject: [PATCH 07/17] Improve vector function

---
 captum/optim/_utils/image/common.py    | 14 ++++++++------
 tests/optim/utils/image/test_common.py | 13 +++++++++++++
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py
index 31af3169ef..1f2cced14f 100644
--- a/captum/optim/_utils/image/common.py
+++ b/captum/optim/_utils/image/common.py
@@ -385,7 +385,7 @@ def _create_new_vector(
     Args:
 
         x (torch.Tensor): A set of 2d or 4d activations.
-        vec (torch.Tensor): A direction vector to use, with a compatible shape for
+        vec (torch.Tensor): A 1D direction vector to use, with a compatible shape for
             computing the matrix product of the activations. See torch.matmul for
             See torch.matmul for more details on compatible shapes:
             https://pytorch.org/docs/stable/generated/torch.matmul.html
@@ -405,12 +405,14 @@ def _create_new_vector(
             stored vector.
     """
     assert x.device == vec.device
-    assert x.dim() > 1
+    assert x.dim() > 1 and vec.dim() == 1
     if activation_fn:
         x = activation_fn(x)
-    if x.dim() > 2 and move_channel_dim_to_final_dim:
-        permute_vals = [0] + list(range(x.dim()))[2:] + [1]
-        x = x.permute(*permute_vals)
-        return torch.mean(x @ vec, [1, 2])
+    if x.dim() > 2:
+        if move_channel_dim_to_final_dim:
+            permute_vals = [0] + list(range(x.dim()))[2:] + [1]
+            x = x.permute(*permute_vals)
+        mean_vals = list(range(1, x.dim() - 1))
+        return torch.mean(x @ vec, mean_vals)
     else:
         return (x @ vec)[:, None]
diff --git a/tests/optim/utils/image/test_common.py b/tests/optim/utils/image/test_common.py
index fcece26683..09e1a7355c 100644
--- a/tests/optim/utils/image/test_common.py
+++ b/tests/optim/utils/image/test_common.py
@@ -550,3 +550,16 @@ def test_create_new_vector_no_activation_fn(self) -> None:
         vec = torch.tensor([1, 1, 1]).float()
         out = common._create_new_vector(x, vec, activation_fn=None)
         self.assertEqual(out.item(), 0.0)
+
+    def test_create_new_vector_channels_last(self) -> None:
+        x = torch.arange(0, 4 * 5 * 5 * 3).view(4, 5, 5, 3).float()
+        vec = torch.tensor([0, 1, 0]).float()
+        out = common._create_new_vector(x, vec, move_channel_dim_to_final_dim=False)
+        self.assertEqual(out.tolist(), [37.0, 112.0, 187.0, 262.0])
+
+    def test_create_new_vector_dim_2(self) -> None:
+        x = torch.arange(0, 1 * 3).view(1, 3).float()
+        vec = torch.tensor([0, 1, 0]).float()
+        out = common._create_new_vector(x, vec)
+        self.assertEqual(list(out.shape), [1, 1])
+        self.assertEqual(out.item(), 1.0)

From 3b67bb047723497ae18afaf99bbf9e5dc67d55ba Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sat, 28 May 2022 11:12:48 -0600
Subject: [PATCH 08/17] Improve the `FacetLoss` objective

* Improve efficiency of the `FacetLoss` objective.
---
 captum/optim/_core/loss.py | 42 ++++++++++++--------------------------
 1 file changed, 13 insertions(+), 29 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index cd52f02951..731eeb2346 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -1005,36 +1005,12 @@ def __init__(
         assert facet_weights.dim() == 4 or facet_weights.dim() == 2
         self.facet_weights = facet_weights
 
-    def _get_strength(self, batch: int, device: torch.device) -> torch.Tensor:
-        """
-        Calculate batch weighting.
-
-        Args:
-
-            batch (int): The size of the batch dimension to use.
-            device (torch.device): The device to use.
-
-        Returns:
-            strength_t (torch.Tensor): A tensor containing the weights to multiply the
-                different batch dimensions by.
-        """
-        if isinstance(self.strength, (tuple, list)):
-            strength_t = torch.linspace(
-                self.strength[0],
-                self.strength[1],
-                steps=batch,
-                device=device,
-            )
-        else:
-            strength_t = torch.ones([1], device=device) * self.strength
-        return strength_t[:, None, None, None]
-
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations_ultimate = targets_to_values[self.ultimate_target]
-        activations_ultimate = activations_ultimate
-        new_vec = _create_new_vector(activations_ultimate, self.vec)[
+        activations_ultimate = activations_ultimate[
             self.batch_index[0] : self.batch_index[1]
         ]
+        new_vec = _create_new_vector(activations_ultimate, self.vec)
         target_activations = targets_to_values[self.layer_target]
 
         layer_grad = torch.autograd.grad(
@@ -1042,15 +1018,23 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
             inputs=target_activations,
             grad_outputs=torch.ones_like(new_vec),
             retain_graph=True,
-        )[0]
+        )[0].detach()[self.batch_index[0] : self.batch_index[1]]
         layer = target_activations[self.batch_index[0] : self.batch_index[1]]
 
-        flat_attr = layer * torch.nn.functional.relu(layer_grad.detach())
+        flat_attr = layer * torch.nn.functional.relu(layer_grad)
         if self.facet_weights.dim() == 2 and flat_attr.dim() == 4:
             flat_attr = torch.sum(flat_attr, dim=(2, 3))
 
         if self.strength:
-            strength_t = self._get_strength(new_vec.shape[0], flat_attr.device)
+            if isinstance(self.strength, (tuple, list)):
+                strength_t = torch.linspace(
+                    self.strength[0],
+                    self.strength[1],
+                    steps=flat_attr.shape[0],
+                    device=flat_attr.device,
+                ).reshape(flat_attr.shape[0], *[1] * (flat_attr.dim() - 1))
+            else:
+                strength_t = self.strength
             flat_attr = strength_t * flat_attr
         return torch.sum(flat_attr * self.facet_weights)
 

From 4c51ef1c1f1ead191b370f589c61e478096d612f Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sat, 28 May 2022 12:21:19 -0600
Subject: [PATCH 09/17] Add CLIP objectives to `__all__`

---
 captum/optim/_core/loss.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 731eeb2346..01894ae078 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -1110,6 +1110,9 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:
     "AngledNeuronDirection",
     "TensorDirection",
     "ActivationWeights",
+    "L2Mean",
+    "VectorLoss",
+    "FacetLoss",
     "sum_loss_list",
     "default_loss_summarize",
 ]

From 31cb2a903330cb87e5dfbb76871a0138606d6a7e Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sat, 28 May 2022 15:17:51 -0600
Subject: [PATCH 10/17] Fix mistake in FacetLoss docs

---
 captum/optim/_core/loss.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 01894ae078..6542e828b7 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -980,15 +980,16 @@ def __init__(
                 the model.
             layer_target (nn.Module): A layer that we have facet_weights for. This
                 target layer should be below the ultimate_target layer in the model.
+            facet_weights (torch.Tensor): Weighting that steers the objective
+                towards a particular theme or concept. These weight values should
+                come from linear probes trained on layer_target.
             strength (float, list of float, optional): A single float or list of floats
                 to use for batch dimension weighting. If using a single value, then it
                 will be applied to all batch dimensions equally. Otherwise a list of
                 floats with a shape of: [start, end] should be used for torch.linspace
                 to calculate the step values in between. Default is set to None for no
                 weighting.
-            facet_weights (torch.Tensor): Weighting that steers the objective
-                towards a particular theme or concept. These weight values should
-                come from linear probes trained on layer_target.
+                Default: None
             batch_index (int, optional): The index of the activations to optimize if
                 optimizing a batch of activations. If set to None, defaults to all
                 activations in the batch.

From 264a8ad563993c0d73dc772ba0f3d763882485d9 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 2 Jun 2022 10:33:25 -0600
Subject: [PATCH 11/17] Support non default input sizes in FacetLoss

---
 captum/optim/_core/loss.py    | 14 +++++++++++++-
 tests/optim/core/test_loss.py | 28 +++++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 6542e828b7..04457aaa30 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -1037,7 +1037,19 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
             else:
                 strength_t = self.strength
             flat_attr = strength_t * flat_attr
-        return torch.sum(flat_attr * self.facet_weights)
+
+        if (
+            self.facet_weights.dim() == 4
+            and layer.dim() == 4
+            and self.facet_weights.shape[2:] != layer.shape[2:]
+        ):
+            facet_weights = torch.nn.functional.interpolate(
+                self.facet_weights, size=layer.shape[2:]
+            )
+        else:
+            facet_weights = self.facet_weights
+
+        return torch.sum(flat_attr * facet_weights)
 
 
 def sum_loss_list(
diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py
index 6ae0105f55..ee8e34a033 100644
--- a/tests/optim/core/test_loss.py
+++ b/tests/optim/core/test_loss.py
@@ -289,7 +289,7 @@ def test_facetloss_single_channel(self) -> None:
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
-        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        facet_weights = torch.ones([1, 2, 6, 6]) * 1.5
         loss = opt_loss.FacetLoss(
             ultimate_target=model[1],
             layer_target=model[0].layer,
@@ -308,7 +308,7 @@ def test_facetloss_multi_channel(self) -> None:
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([1, 1, 1]).float()
-        facet_weights = torch.ones([1, 2, 1, 1]) * 2.0
+        facet_weights = torch.ones([1, 2, 6, 6]) * 2.0
         loss = opt_loss.FacetLoss(
             ultimate_target=model[1],
             layer_target=model[0].layer,
@@ -325,7 +325,7 @@ def test_facetloss_strength(self) -> None:
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
-        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        facet_weights = torch.ones([1, 2, 6, 6]) * 1.5
         strength = 0.5
         loss = opt_loss.FacetLoss(
             ultimate_target=model[1],
@@ -345,7 +345,7 @@ def test_facetloss_strength_batch(self) -> None:
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
-        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        facet_weights = torch.ones([1, 2, 6, 6]) * 1.5
         strength = [0.1, 5.05]
         loss = opt_loss.FacetLoss(
             ultimate_target=model[1],
@@ -385,7 +385,7 @@ def test_facetloss_batch_index(self) -> None:
         model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
 
         vec = torch.tensor([0, 1, 0]).float()
-        facet_weights = torch.ones([1, 2, 1, 1]) * 1.5
+        facet_weights = torch.ones([1, 2, 5, 5]) * 1.5
         loss = opt_loss.FacetLoss(
             ultimate_target=model[1],
             layer_target=model[0].layer,
@@ -397,6 +397,24 @@ def test_facetloss_batch_index(self) -> None:
         output = get_loss_value(model, loss, model_input)
         self.assertAlmostEqual(output.item(), 10.38000202178955, places=5)
 
+    def test_facetloss_resize_4d(self) -> None:
+        layer = torch.nn.Conv2d(2, 3, 1, bias=True)
+        layer.weight.data.fill_(0.1)  # type: ignore
+        layer.bias.data.fill_(1)  # type: ignore
+
+        model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer)
+
+        vec = torch.tensor([1, 1, 1]).float()
+        facet_weights = torch.ones([1, 2, 12, 12]) * 2.0
+        loss = opt_loss.FacetLoss(
+            ultimate_target=model[1],
+            layer_target=model[0].layer,
+            vec=vec,
+            facet_weights=facet_weights,
+        )
+        output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6])
+        self.assertAlmostEqual(output, 1.560000, places=6)
+
 
 class TestCompositeLoss(BaseTest):
     def test_negative(self) -> None:

From 2b665f2a74c64097dc3e4d8ad6acfc74eeb5e7c0 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 27 Jun 2022 17:51:53 -0600
Subject: [PATCH 12/17] Improve CLIP loss docs for Sphinx

---
 captum/optim/_core/loss.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 04457aaa30..252f569921 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -865,14 +865,14 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance.
             channel_index (int, optional): Optionally only target a specific channel.
-                If set to None, all channels with be used.
-                Default: None
+                If set to ``None``, all channels with be used.
+                Default: ``None``
             constant (float, optional): Constant value to deduct from the activations.
-                Default: 0.5
+                Default: ``0.5``
             batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
+                optimizing a batch of activations. If set to ``None``, defaults to all
                 activations in the batch.
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.constant = constant
@@ -920,16 +920,16 @@ def __init__(
                 channel / feature dimension of the target layer instance.
             activation_fn (Callable, optional): An optional activation function to
                 apply to the activations before computing the matrix product. If set
-                to None, then no activation function will be used.
-                Default: torch.nn.functional.relu
+                to ``None``, then no activation function will be used.
+                Default: ``torch.nn.functional.relu``
             move_channel_dim_to_final_dim (bool, optional): Whether or not to move the
                 channel dimension to the last dimension before computing the matrix
                 product.
-                Default: True
+                Default: ``True``
             batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
+                optimizing a batch of activations. If set to ``None``, defaults to all
                 activations in the batch.
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         assert vec.dim() == 1
@@ -979,21 +979,22 @@ def __init__(
                 visualizing targets from. This is normally the penultimate layer of
                 the model.
             layer_target (nn.Module): A layer that we have facet_weights for. This
-                target layer should be below the ultimate_target layer in the model.
+                target layer should be below the ``ultimate_target`` layer in the
+                model.
             facet_weights (torch.Tensor): Weighting that steers the objective
                 towards a particular theme or concept. These weight values should
-                come from linear probes trained on layer_target.
+                come from linear probes trained on ``layer_target``.
             strength (float, list of float, optional): A single float or list of floats
                 to use for batch dimension weighting. If using a single value, then it
                 will be applied to all batch dimensions equally. Otherwise a list of
-                floats with a shape of: [start, end] should be used for torch.linspace
-                to calculate the step values in between. Default is set to None for no
-                weighting.
-                Default: None
+                floats with a shape of: [start, end] should be used for
+                ``torch.linspace`` to calculate the step values in between. Default is
+                set to ``None`` for no weighting.
+                Default: ``None``
             batch_index (int, optional): The index of the activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
+                optimizing a batch of activations. If set to ``None``, defaults to all
                 activations in the batch.
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index)
         self.ultimate_target = ultimate_target

From d3a2ccadb7e7dd843f4e703d0ee1ae313fc8b756 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 27 Jun 2022 19:49:48 -0600
Subject: [PATCH 13/17] Improve vector function docs for Sphinx

---
 captum/optim/_utils/image/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py
index 1f2cced14f..54cdae4b0d 100644
--- a/captum/optim/_utils/image/common.py
+++ b/captum/optim/_utils/image/common.py
@@ -394,11 +394,11 @@ def _create_new_vector(
         activation_fn (Callable, optional): An optional activation function to
             apply to the activations before computing the matrix product. If set
             to None, then no activation function will be used.
-            Default: torch.nn.functional.relu
+            Default: ``torch.nn.functional.relu``
         move_channel_dim_to_final_dim (bool, optional): Whether or not to move the
             channel dimension to the last dimension before computing the matrix
             product.
-            Default: True
+            Default: ``True``
 
     Returns
         x (torch.Tensor): A  vector created from the input activations and the

From e80b42eae878e0ecbbd7921871e3cc8eae825eb2 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 28 Jun 2022 19:34:57 -0600
Subject: [PATCH 14/17] Fix spacing in docs

---
 captum/optim/_utils/image/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py
index 54cdae4b0d..9e7553b251 100644
--- a/captum/optim/_utils/image/common.py
+++ b/captum/optim/_utils/image/common.py
@@ -389,7 +389,7 @@ def _create_new_vector(
             computing the matrix product of the activations. See torch.matmul for
             See torch.matmul for more details on compatible shapes:
             https://pytorch.org/docs/stable/generated/torch.matmul.html
-            By default, vec is expected to share the same size as the channel or
+            By default, ``vec`` is expected to share the same size as the channel or
             feature dimension of the activations.
         activation_fn (Callable, optional): An optional activation function to
             apply to the activations before computing the matrix product. If set
@@ -401,7 +401,7 @@ def _create_new_vector(
             Default: ``True``
 
     Returns
-        x (torch.Tensor): A  vector created from the input activations and the
+        x (torch.Tensor): A vector created from the input activations and the
             stored vector.
     """
     assert x.device == vec.device

From 509accd805bd864c29abf92546116e010ffbe89a Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 14 Jul 2022 11:38:16 -0600
Subject: [PATCH 15/17] Improve loss docs

---
 captum/optim/_core/loss.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 252f569921..8c9011e8a2 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -924,7 +924,7 @@ def __init__(
                 Default: ``torch.nn.functional.relu``
             move_channel_dim_to_final_dim (bool, optional): Whether or not to move the
                 channel dimension to the last dimension before computing the matrix
-                product.
+                product. Set to ``False`` if the using the channels last format.
                 Default: ``True``
             batch_index (int, optional): The index of activations to optimize if
                 optimizing a batch of activations. If set to ``None``, defaults to all
@@ -988,8 +988,8 @@ def __init__(
                 to use for batch dimension weighting. If using a single value, then it
                 will be applied to all batch dimensions equally. Otherwise a list of
                 floats with a shape of: [start, end] should be used for
-                ``torch.linspace`` to calculate the step values in between. Default is
-                set to ``None`` for no weighting.
+                :func:`torch.linspace` to calculate the step values in between. Default
+                is set to ``None`` for no weighting.
                 Default: ``None``
             batch_index (int, optional): The index of the activations to optimize if
                 optimizing a batch of activations. If set to ``None``, defaults to all

From 44260036a1649ca86f1eff8f1afe9f12f2764ccb Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sat, 16 Jul 2022 10:52:42 -0600
Subject: [PATCH 16/17] Fix clip objective doc type formatting

---
 captum/optim/_core/loss.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 8c9011e8a2..5f10ddafcb 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -857,7 +857,7 @@ def __init__(
         target: torch.nn.Module,
         channel_index: Optional[int] = None,
         constant: float = 0.5,
-        batch_index: Optional[int] = None,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
         """
         Args:
@@ -869,9 +869,10 @@ def __init__(
                 Default: ``None``
             constant (float, optional): Constant value to deduct from the activations.
                 Default: ``0.5``
-            batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to ``None``, defaults to all
-                activations in the batch.
+            batch_index (int or List[int], optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set
+                to ``None``, defaults to all activations in the batch. Index ranges
+                should be in the format of: [start, end].
                 Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
@@ -910,7 +911,7 @@ def __init__(
         vec: torch.Tensor,
         activation_fn: Optional[Callable] = torch.nn.functional.relu,
         move_channel_dim_to_final_dim: bool = True,
-        batch_index: Optional[int] = None,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
         """
         Args:
@@ -926,9 +927,10 @@ def __init__(
                 channel dimension to the last dimension before computing the matrix
                 product. Set to ``False`` if the using the channels last format.
                 Default: ``True``
-            batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to ``None``, defaults to all
-                activations in the batch.
+            batch_index (int or List[int], optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set
+                to ``None``, defaults to all activations in the batch. Index ranges
+                should be in the format of: [start, end].
                 Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
@@ -984,16 +986,17 @@ def __init__(
             facet_weights (torch.Tensor): Weighting that steers the objective
                 towards a particular theme or concept. These weight values should
                 come from linear probes trained on ``layer_target``.
-            strength (float, list of float, optional): A single float or list of floats
+            strength (float, List[float], optional): A single float or list of floats
                 to use for batch dimension weighting. If using a single value, then it
                 will be applied to all batch dimensions equally. Otherwise a list of
                 floats with a shape of: [start, end] should be used for
                 :func:`torch.linspace` to calculate the step values in between. Default
                 is set to ``None`` for no weighting.
                 Default: ``None``
-            batch_index (int, optional): The index of the activations to optimize if
-                optimizing a batch of activations. If set to ``None``, defaults to all
-                activations in the batch.
+            batch_index (int or List[int], optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set
+                to ``None``, defaults to all activations in the batch. Index ranges
+                should be in the format of: [start, end].
                 Default: ``None``
         """
         BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index)

From 2480b6925aa3413aec60f1936aceeea67fbe5159 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 18 Jul 2022 15:18:00 -0600
Subject: [PATCH 17/17] Fix loss docstring type hint formatting

---
 captum/optim/_core/loss.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 5f10ddafcb..0cc6033fd4 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -869,7 +869,7 @@ def __init__(
                 Default: ``None``
             constant (float, optional): Constant value to deduct from the activations.
                 Default: ``0.5``
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -919,7 +919,7 @@ def __init__(
             target (nn.Module): A target layer instance.
             vec (torch.Tensor): A 1D channel vector with the same size as the
                 channel / feature dimension of the target layer instance.
-            activation_fn (Callable, optional): An optional activation function to
+            activation_fn (callable, optional): An optional activation function to
                 apply to the activations before computing the matrix product. If set
                 to ``None``, then no activation function will be used.
                 Default: ``torch.nn.functional.relu``
@@ -927,7 +927,7 @@ def __init__(
                 channel dimension to the last dimension before computing the matrix
                 product. Set to ``False`` if the using the channels last format.
                 Default: ``True``
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -986,14 +986,14 @@ def __init__(
             facet_weights (torch.Tensor): Weighting that steers the objective
                 towards a particular theme or concept. These weight values should
                 come from linear probes trained on ``layer_target``.
-            strength (float, List[float], optional): A single float or list of floats
+            strength (float, list of float, optional): A single float or list of floats
                 to use for batch dimension weighting. If using a single value, then it
                 will be applied to all batch dimensions equally. Otherwise a list of
                 floats with a shape of: [start, end] should be used for
                 :func:`torch.linspace` to calculate the step values in between. Default
                 is set to ``None`` for no weighting.
                 Default: ``None``
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].