Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions src/anomalib/models/image/dinomaly/torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,9 @@ def get_encoder_decoder_outputs(self, x: torch.Tensor) -> tuple[list[torch.Tenso
- en: List of fused encoder features reshaped to spatial dimensions
- de: List of fused decoder features reshaped to spatial dimensions
"""
h_patches = x.shape[2] // self.encoder.patch_size
w_patches = x.shape[3] // self.encoder.patch_size
x = self.encoder.prepare_tokens(x)

encoder_features = []
decoder_features = []

Expand Down Expand Up @@ -237,8 +238,8 @@ def get_encoder_decoder_outputs(self, x: torch.Tensor) -> tuple[list[torch.Tenso
de = [self._fuse_feature([decoder_features[idx] for idx in idxs]) for idxs in self.fuse_layer_decoder]

# Process features for spatial output
en = self._process_features_for_spatial_output(en, side)
de = self._process_features_for_spatial_output(de, side)
en = self._process_features_for_spatial_output(en, h_patches, w_patches)
de = self._process_features_for_spatial_output(de, h_patches, w_patches)
return en, de

def forward(self, batch: torch.Tensor, global_step: int | None = None) -> torch.Tensor | InferenceBatch:
Expand All @@ -262,7 +263,7 @@ def forward(self, batch: torch.Tensor, global_step: int | None = None) -> torch.

"""
en, de = self.get_encoder_decoder_outputs(batch)
image_size = batch.shape[2]
image_size = (batch.shape[2], batch.shape[3])

if self.training:
if global_step is None:
Expand Down Expand Up @@ -376,7 +377,7 @@ def _get_architecture_config(encoder_name: str, target_layers: list[int] | None)
def _process_features_for_spatial_output(
self,
features: list[torch.Tensor],
side: int,
h_patches: int, w_patches: int
) -> list[torch.Tensor]:
"""Process features for spatial output by removing tokens and reshaping.

Expand All @@ -393,7 +394,7 @@ def _process_features_for_spatial_output(

# Reshape to spatial dimensions
batch_size = features[0].shape[0]
return [f.permute(0, 2, 1).reshape([batch_size, -1, side, side]).contiguous() for f in features]
return [f.permute(0, 2, 1).reshape([batch_size, -1, h_patches, w_patches]).contiguous() for f in features]


class DecoderViTBlock(nn.Module):
Expand Down
Loading
Loading