Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
<a href="#">
<img src="https://img.shields.io/badge/License-MIT-efefef">
</a>
<a href="https://thib-s.github.io/orthogonium/">
<img alt="Documentation" src="https://img.shields.io/badge/Docs-here-0000ff">
</a>
</div>
<br>

Expand All @@ -39,7 +42,7 @@ build orthogonal layers, with a focus on convolutional layers . We noticed that
significant role in the final performance : a more efficient implementation
allows larger networks and more training steps within the same compute
budget. So our implementation differs from original papers in order to
be faster, to consume less memory or be more flexible.
be faster, to consume less memory or be more flexible. Feel free to read the [documentation](https://thib-s.github.io/orthogonium/)!

# 📃 What is included in this library ?

Expand Down
5 changes: 5 additions & 0 deletions docs/api/activations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
::: orthogonium.layers.custom_activations
rendering:
show_root_toc_entry: True
selection:
inherited_members: True
5 changes: 5 additions & 0 deletions docs/api/losses.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
::: orthogonium.losses
rendering:
show_root_toc_entry: True
selection:
inherited_members: True
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ nav:
- convolutions: api/conv.md
- linear layers: api/linear.md
- reparametrizers: api/reparametrizers.md
- activations: api/activations.md
- losses: api/losses.md
# - layers.conv.AOC module: api/aoc.md
# - layers.conv.adaptiveSOC module: api/adaptiveSOC.md
# - layers.conv.SLL module: api/sll.md
Expand Down
27 changes: 2 additions & 25 deletions orthogonium/layers/conv/AOC/fast_block_ortho_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def __init__(
out_channels,
kernel_size,
groups,
contiguous_optimization=False,
):
"""This module is used to generate orthogonal kernels for the BCOP layer. It takes
as input a matrix PQ of shape (groups, 2*kernel_size, c, c//2) and returns a kernel
Expand All @@ -167,9 +166,6 @@ def __init__(
out_channels (int): number of output channels
kernel_size (int): size of the kernel
groups (int): number of groups
contiguous_optimization (bool, optional): if True, the kernel will have twice the
number of channels. This is used to increase expressiveness, but at the price
of orthogonality (not Lipschitzness). Defaults to False.
"""
super(BCOPTrivializer, self).__init__()
self.kernel_size = kernel_size
Expand All @@ -178,9 +174,6 @@ def __init__(
self.in_channels = in_channels
self.min_channels = min(in_channels, out_channels)
self.max_channels = max(in_channels, out_channels)
if contiguous_optimization:
self.max_channels *= 2
self.contiguous_optimization = contiguous_optimization
self.transpose = out_channels < in_channels
self.num_kernels = 2 * kernel_size

Expand Down Expand Up @@ -249,12 +242,6 @@ def forward(self, PQ):
res = c11
for i in range(c22.shape[0]): # c22.shape[0] == 1 if k-1 is a power of two
res = fast_matrix_conv(res, c22[i], self.groups)
# if contiguous optimization is enabled, we constructed a conv with twice the number
# of channels, we need to remove the extra channels
if self.contiguous_optimization:
res = res[
: self.max_channels // 2, : self.min_channels // self.groups, :, :
]
# since it is less expensive to compute the transposed kernel when co < ci
# we transpose the kernel if needed
if self.transpose:
Expand Down Expand Up @@ -288,24 +275,15 @@ def attach_bcop_weight(
num_kernels = (
2 * kernel_size
) # the number of projectors needed to create the kernel
contiguous_optimization = ortho_params.contiguous_optimization
# register projectors matrices
layer.register_parameter(
weight_name,
torch.nn.Parameter(
torch.Tensor(
groups,
num_kernels,
(
2 * max_channels // groups
if contiguous_optimization
else max_channels // groups
),
(
max_channels // groups
if contiguous_optimization
else max_channels // (groups * 2)
),
(max_channels // groups),
(max_channels // (groups * 2)),
),
requires_grad=True,
),
Expand Down Expand Up @@ -343,7 +321,6 @@ def attach_bcop_weight(
out_channels,
kernel_size,
groups,
contiguous_optimization=contiguous_optimization,
),
unsafe=True,
)
Expand Down
104 changes: 54 additions & 50 deletions orthogonium/layers/conv/AOC/ortho_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,32 +27,34 @@ def AdaptiveOrthoConv2d(
"""
Factory function to create an orthogonal convolutional layer, selecting the appropriate class based on kernel size and stride.

**Key Features:**
- Enforces orthogonality, preserving gradient norms.
- Supports native striding, dilation, grouped convolutions, and flexible padding.

**Behavior:**
- When kernel_size == stride, the layer is an `RKOConv2d`.
- When stride == 1, the layer is a `FastBlockConv2d`.
- Otherwise, the layer is a `BcopRkoConv2d`.

**Arguments:**
- `in_channels` (int): Number of input channels.
- `out_channels` (int): Number of output channels.
- `kernel_size` (_size_2_t): Size of the convolution kernel.
- `stride` (_size_2_t, optional): Stride of the convolution. Default is 1.
- `padding` (str or _size_2_t, optional): Padding mode or size. Default is "same".
- `dilation` (_size_2_t, optional): Dilation rate. Default is 1.
- `groups` (int, optional): Number of blocked connections from input to output channels. Default is 1.
- `bias` (bool, optional): Whether to include a learnable bias. Default is True.
- `padding_mode` (str, optional): Padding mode. Default is "circular".
- `ortho_params` (OrthoParams, optional): Parameters to control orthogonality. Default is `OrthoParams()`.

**Returns:**
- A configured instance of `nn.Conv2d` (one of `RKOConv2d`, `FastBlockConv2d`, or `BcopRkoConv2d`).

**Raises:**
- `ValueError`: If kernel_size < stride, as orthogonality cannot be enforced.
Key Features:
-------------
- Enforces orthogonality, preserving gradient norms.
- Supports native striding, dilation, grouped convolutions, and flexible padding.

Behavior:
-------------
- When kernel_size == stride, the layer is an `RKOConv2d`.
- When stride == 1, the layer is a `FastBlockConv2d`.
- Otherwise, the layer is a `BcopRkoConv2d`.

Arguments:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
kernel_size (_size_2_t): Size of the convolution kernel.
stride (_size_2_t, optional): Stride of the convolution. Default is 1.
padding (str or _size_2_t, optional): Padding mode or size. Default is "same".
dilation (_size_2_t, optional): Dilation rate. Default is 1.
groups (int, optional): Number of blocked connections from input to output channels. Default is 1.
bias (bool, optional): Whether to include a learnable bias. Default is True.
padding_mode (str, optional): Padding mode. Default is "circular".
ortho_params (OrthoParams, optional): Parameters to control orthogonality. Default is `OrthoParams()`.

Returns:
A configured instance of `nn.Conv2d` (one of `RKOConv2d`, `FastBlockConv2d`, or `BcopRkoConv2d`).

Raises:
`ValueError`: If kernel_size < stride, as orthogonality cannot be enforced.
"""

if kernel_size < stride:
Expand Down Expand Up @@ -95,30 +97,32 @@ def AdaptiveOrthoConvTranspose2d(
"""
Factory function to create an orthogonal convolutional transpose layer, adapting based on kernel size and stride.

**Key Features:**
- Ensures orthogonality in transpose convolutions for stable gradient propagation.
- Supports dilation, grouped operations, and efficient kernel construction.

**Behavior:**
- When kernel_size == stride, the layer is an `RkoConvTranspose2d`.
- When stride == 1, the layer is a `FastBlockConvTranspose2D`.
- Otherwise, the layer is a `BcopRkoConvTranspose2d`.

**Arguments:**
- `in_channels` (int): Number of input channels.
- `out_channels` (int): Number of output channels.
- `kernel_size` (_size_2_t): Size of the convolution kernel.
- `stride` (_size_2_t, optional): Stride of the transpose convolution. Default is 1.
- `padding` (_size_2_t, optional): Padding size. Default is 0.
- `output_padding` (_size_2_t, optional): Additional size for output. Default is 0.
- `groups` (int, optional): Number of groups. Default is 1.
- `bias` (bool, optional): Whether to include a learnable bias. Default is True.
- `dilation` (_size_2_t, optional): Dilation rate. Default is 1.
- `padding_mode` (str, optional): Padding mode. Default is "zeros".
- `ortho_params` (OrthoParams, optional): Parameters to control orthogonality. Default is `OrthoParams()`.

**Returns:**
- A configured instance of `nn.ConvTranspose2d` (one of `RkoConvTranspose2d`, `FastBlockConvTranspose2D`, or `BcopRkoConvTranspose2d`).
Key Features:
-------------
- Ensures orthogonality in transpose convolutions for stable gradient propagation.
- Supports dilation, grouped operations, and efficient kernel construction.

Behavior:
---------
- When kernel_size == stride, the layer is an `RkoConvTranspose2d`.
- When stride == 1, the layer is a `FastBlockConvTranspose2D`.
- Otherwise, the layer is a `BcopRkoConvTranspose2d`.

Arguments:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
kernel_size (_size_2_t): Size of the convolution kernel.
stride (_size_2_t, optional): Stride of the transpose convolution. Default is 1.
padding (_size_2_t, optional): Padding size. Default is 0.
output_padding (_size_2_t, optional): Additional size for output. Default is 0.
groups (int, optional): Number of groups. Default is 1.
bias (bool, optional): Whether to include a learnable bias. Default is True.
dilation (_size_2_t, optional): Dilation rate. Default is 1.
padding_mode (str, optional): Padding mode. Default is "zeros".
ortho_params (OrthoParams, optional): Parameters to control orthogonality. Default is `OrthoParams()`.

Returns:
A configured instance of `nn.ConvTranspose2d` (one of `RkoConvTranspose2d`, `FastBlockConvTranspose2D`, or `BcopRkoConvTranspose2d`).

**Raises:**
- `ValueError`: If kernel_size < stride, as orthogonality cannot be enforced.
Expand Down
26 changes: 7 additions & 19 deletions orthogonium/layers/conv/adaptiveSOC/fast_skew_ortho_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,17 @@ def attach_soc_weight(
weight_name (str): name of the weight
kernel_shape (tuple): shape of the kernel (out_channels, in_channels/groups, kernel_size, kernel_size)
groups (int): number of groups
bjorck_params (BjorckParams, optional): parameters of the Bjorck orthogonalization. Defaults to BjorckParams().
exp_params (ExpParams): parameters for the exponential algorithm.

Returns:
torch.Tensor: a handle to the attached weight
"""
out_channels, in_channels, kernel_size, k2 = kernel_shape
in_channels *= groups # compute the real number of input channels
assert kernel_size == k2, "only square kernels are supported for the moment"
assert (
kernel_size == k2
), "only square kernels are supported (to compute skew symmetric kernels)"
assert kernel_size % 2 == 1, "kernel size must be odd"
max_channels = max(in_channels, out_channels)
layer.register_parameter(
weight_name,
Expand Down Expand Up @@ -238,8 +241,6 @@ def __init__(
raise ValueError(
"kernel size must be smaller than stride. The set of orthonal convolutions is empty in this setting."
)
if (in_channels % groups != 0) and (out_channels % groups != 0):
)
self.padding = padding
self.stride = stride
self.kernel_size = kernel_size
Expand All @@ -252,11 +253,6 @@ def __init__(
groups,
exp_params=exp_params,
)
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
nn.init.zeros_(self.bias)
else:
self.register_parameter("bias", None)

def singular_values(self):
"""Compute the singular values of the convolutional layer using the FFT+SVD method.
Expand Down Expand Up @@ -341,8 +337,6 @@ def __init__(
raise ValueError(
"kernel size must be smaller than stride. The set of orthonal convolutions is empty in this setting."
)
if (in_channels % groups != 0) and (out_channels % groups != 0):
)
if ((self.max_channels // groups) < 2) and (kernel_size != stride):
raise ValueError("inner conv must have at least 2 channels")
if out_channels * (stride**2) < in_channels:
Expand All @@ -367,12 +361,6 @@ def __init__(
exp_params=exp_params,
)

if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
nn.init.zeros_(self.bias)
else:
self.register_parameter("bias", None)

def singular_values(self):
if self.padding_mode != "circular":
print(
Expand All @@ -387,8 +375,8 @@ def singular_values(self):
self.groups,
self.in_channels // self.groups,
self.out_channels // self.groups,
self.kernel_size,
self.kernel_size,
self.weight.shape[-2],
self.weight.shape[-1],
)
.numpy(),
self._input_shape,
Expand Down
2 changes: 1 addition & 1 deletion orthogonium/layers/conv/adaptiveSOC/ortho_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def AdaptiveSOCConv2d(
)
if kernel_size == stride:
convclass = RKOConv2d
elif (stride == 1) or (in_channels >= out_channels):
elif stride == 1:
convclass = FastSOC
else:
convclass = SOCRkoConv2d
Expand Down
Loading
Loading