Skip to content

Commit 9779324

Browse files
committed
Making vgg16 backbone same as paper.
1 parent 80da3b9 commit 9779324

File tree

1 file changed

+58
-31
lines changed
  • torchvision/models/detection

1 file changed

+58
-31
lines changed

torchvision/models/detection/ssd.py

Lines changed: 58 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def forward(self, x):
8787
return output
8888

8989

90-
def vgg16_mfm_backbone(pretrained, trainable_layers=3):
90+
def _vgg16_mfm_backbone(pretrained, trainable_layers=3):
9191
backbone = vgg.vgg16(pretrained=pretrained).features
9292

9393
# Gather the indices of maxpools. These are the locations of output blocks.
@@ -102,50 +102,77 @@ def vgg16_mfm_backbone(pretrained, trainable_layers=3):
102102
for parameter in b.parameters():
103103
parameter.requires_grad_(False)
104104

105+
# Patch ceil_mode for all maxpool layers of backbone to get the same outputs as Fig2 of SSD papers
106+
for layer in backbone:
107+
if isinstance(layer, nn.MaxPool2d):
108+
layer.ceil_mode = True
109+
105110
# Multiple Feature map definition - page 4, Fig 2 of SSD paper
111+
def build_feature_map_block(layers, out_channels):
112+
block = nn.Sequential(*layers)
113+
block.out_channels = out_channels
114+
return block
115+
106116
feature_maps = nn.ModuleList([
107117
# Conv4_3 map
108-
nn.Sequential(
109-
*backbone[:23], # until conv4_3
118+
build_feature_map_block(
119+
backbone[:23], # until conv4_3
120+
# TODO: add L2 nomarlization + scaling?
121+
512
110122
),
111123
# FC7 map
112-
nn.Sequential(
113-
*backbone[23:], # until maxpool5 # TODO: replace maxpool 5 as in the paper?
114-
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=1), # FC6
115-
nn.ReLU(inplace=True),
116-
nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1), # FC7
117-
nn.ReLU(inplace=True)
124+
build_feature_map_block(
125+
(
126+
*backbone[23:-1], # until conv5_3
127+
nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True), # modified maxpool5
128+
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6), # FC6 with atrous
129+
nn.ReLU(inplace=True),
130+
nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1), # FC7
131+
nn.ReLU(inplace=True)
132+
),
133+
1024
118134
),
119135
# Conv8_2 map
120-
nn.Sequential(
121-
nn.Conv2d(1024, 256, kernel_size=1),
122-
nn.ReLU(inplace=True),
123-
nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2),
124-
nn.ReLU(inplace=True),
136+
build_feature_map_block(
137+
(
138+
nn.Conv2d(1024, 256, kernel_size=1),
139+
nn.ReLU(inplace=True),
140+
nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2),
141+
nn.ReLU(inplace=True),
142+
),
143+
512,
125144
),
126145
# Conv9_2 map
127-
nn.Sequential(
128-
nn.Conv2d(512, 128, kernel_size=1),
129-
nn.ReLU(inplace=True),
130-
nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2),
131-
nn.ReLU(inplace=True),
146+
build_feature_map_block(
147+
(
148+
nn.Conv2d(512, 128, kernel_size=1),
149+
nn.ReLU(inplace=True),
150+
nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2),
151+
nn.ReLU(inplace=True),
152+
),
153+
256,
132154
),
133155
# Conv10_2 map
134-
nn.Sequential(
135-
nn.Conv2d(256, 128, kernel_size=1),
136-
nn.ReLU(inplace=True),
137-
nn.Conv2d(128, 256, kernel_size=3, padding=1),
138-
nn.ReLU(inplace=True),
156+
build_feature_map_block(
157+
(
158+
nn.Conv2d(256, 128, kernel_size=1),
159+
nn.ReLU(inplace=True),
160+
nn.Conv2d(128, 256, kernel_size=3),
161+
nn.ReLU(inplace=True),
162+
),
163+
256,
139164
),
140165
# Conv11_2 map
141-
nn.Sequential(
142-
nn.Conv2d(256, 128, kernel_size=1),
143-
nn.ReLU(inplace=True),
144-
nn.Conv2d(128, 256, kernel_size=3, padding=1),
145-
nn.ReLU(inplace=True),
166+
build_feature_map_block(
167+
(
168+
nn.Conv2d(256, 128, kernel_size=1),
169+
nn.ReLU(inplace=True),
170+
nn.Conv2d(128, 256, kernel_size=3),
171+
nn.ReLU(inplace=True),
172+
),
173+
256,
146174
),
147175
])
148-
# TODO: keep track of block output sizes in a variable. Perhaps define a new block class that has it as attribute?
149176

150177
return MultiFeatureMap(feature_maps)
151178

@@ -159,7 +186,7 @@ def ssd_vgg16(pretrained=False, progress=True,
159186
# no need to download the backbone if pretrained is set
160187
pretrained_backbone = False
161188

162-
backbone = vgg16_mfm_backbone(pretrained_backbone, trainable_layers=trainable_backbone_layers)
189+
backbone = _vgg16_mfm_backbone(pretrained_backbone, trainable_layers=trainable_backbone_layers)
163190
model = SSD(backbone, num_classes, **kwargs)
164191
if pretrained:
165192
pass # TODO: load pre-trained COCO weights

0 commit comments

Comments
 (0)