Skip to content

Commit e4bc4e0

Browse files
committed
resolve merge conflicts
2 parents 61b4f05 + 058f4bd commit e4bc4e0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1874
-61
lines changed

docs/source/datasets.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,17 @@ You can also create your own datasets using the provided :ref:`base classes <bas
3838
Cityscapes
3939
CocoCaptions
4040
CocoDetection
41+
DTD
4142
EMNIST
4243
FakeData
4344
FashionMNIST
45+
FER2013
4446
Flickr8k
4547
Flickr30k
4648
FlyingChairs
4749
FlyingThings3D
4850
Food101
51+
GTSRB
4952
HD1K
5053
HMDB51
5154
ImageNet
@@ -59,6 +62,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
5962
LSUN
6063
MNIST
6164
Omniglot
65+
OxfordIIITPet
6266
PhotoTour
6367
Places365
6468
QMNIST

references/classification/README.md

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,25 +145,57 @@ Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`.
145145

146146
### Vision Transformer
147147

148-
#### Base models
148+
#### vit_b_16
149149
```
150150
torchrun --nproc_per_node=8 train.py\
151-
--model $MODEL --epochs 300 --batch-size 64 --opt adamw --lr 0.003 --wd 0.3\
151+
--model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
152152
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
153153
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
154154
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
155155
```
156-
Here `$MODEL` is one of `vit_b_16` and `vit_b_32`.
157156

158-
#### Large models
157+
Note that the above command corresponds to training on a single node with 8 GPUs.
158+
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
159+
and `--batch_size 64`.
160+
161+
#### vit_b_32
159162
```
160163
torchrun --nproc_per_node=8 train.py\
161-
--model $MODEL --epochs 300 --batch-size 16 --opt adamw --lr 0.003 --wd 0.3\
164+
--model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
165+
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
166+
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\
167+
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
168+
```
169+
170+
Note that the above command corresponds to training on a single node with 8 GPUs.
171+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
172+
and `--batch_size 256`.
173+
174+
#### vit_l_16
175+
```
176+
torchrun --nproc_per_node=8 train.py\
177+
--model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\
178+
--lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\
179+
--auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\
180+
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232
181+
```
182+
183+
Note that the above command corresponds to training on a single node with 8 GPUs.
184+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
185+
and `--batch_size 64`.
186+
187+
#### vit_l_32
188+
```
189+
torchrun --nproc_per_node=8 train.py\
190+
--model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
162191
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
163192
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
164193
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
165194
```
166-
Here `$MODEL` is one of `vit_l_16` and `vit_l_32`.
195+
196+
Note that the above command corresponds to training on a single node with 8 GPUs.
197+
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
198+
and `--batch_size 64`.
167199

168200
## Mixed precision training
169201
Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).

test/test_datasets.py

Lines changed: 213 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import bz2
22
import contextlib
3+
import csv
34
import io
45
import itertools
56
import json
@@ -853,7 +854,7 @@ def _annotation_file_name(self, fold, train):
853854

854855
def _create_annotation_file(self, root, name, video_files):
855856
with open(pathlib.Path(root) / name, "w") as fh:
856-
fh.writelines(f"{file}\n" for file in sorted(video_files))
857+
fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files))
857858

858859

859860
class LSUNTestCase(datasets_utils.ImageDatasetTestCase):
@@ -2251,5 +2252,216 @@ def inject_fake_data(self, tmpdir: str, config):
22512252
return num_samples
22522253

22532254

2255+
class DTDTestCase(datasets_utils.ImageDatasetTestCase):
2256+
DATASET_CLASS = datasets.DTD
2257+
FEATURE_TYPES = (PIL.Image.Image, int)
2258+
2259+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
2260+
split=("train", "test", "val"),
2261+
# There is no need to test the whole matrix here, since each fold is treated exactly the same
2262+
partition=(1, 5, 10),
2263+
)
2264+
2265+
def inject_fake_data(self, tmpdir: str, config):
2266+
data_folder = pathlib.Path(tmpdir) / "dtd" / "dtd"
2267+
2268+
num_images_per_class = 3
2269+
image_folder = data_folder / "images"
2270+
image_files = []
2271+
for cls in ("banded", "marbled", "zigzagged"):
2272+
image_files.extend(
2273+
datasets_utils.create_image_folder(
2274+
image_folder,
2275+
cls,
2276+
file_name_fn=lambda idx: f"{cls}_{idx:04d}.jpg",
2277+
num_examples=num_images_per_class,
2278+
)
2279+
)
2280+
2281+
meta_folder = data_folder / "labels"
2282+
meta_folder.mkdir()
2283+
image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files]
2284+
image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2)
2285+
with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file:
2286+
file.write("\n".join(image_ids_in_config) + "\n")
2287+
2288+
return len(image_ids_in_config)
2289+
2290+
2291+
class FER2013TestCase(datasets_utils.ImageDatasetTestCase):
2292+
DATASET_CLASS = datasets.FER2013
2293+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
2294+
2295+
FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
2296+
2297+
def inject_fake_data(self, tmpdir, config):
2298+
base_folder = os.path.join(tmpdir, "fer2013")
2299+
os.makedirs(base_folder)
2300+
2301+
num_samples = 5
2302+
with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file:
2303+
writer = csv.DictWriter(
2304+
file,
2305+
fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",),
2306+
quoting=csv.QUOTE_NONNUMERIC,
2307+
quotechar='"',
2308+
)
2309+
writer.writeheader()
2310+
for _ in range(num_samples):
2311+
row = dict(
2312+
pixels=" ".join(
2313+
str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
2314+
)
2315+
)
2316+
if config["split"] == "train":
2317+
row["emotion"] = str(int(torch.randint(0, 7, ())))
2318+
2319+
writer.writerow(row)
2320+
2321+
return num_samples
2322+
2323+
2324+
class GTSRBTestCase(datasets_utils.ImageDatasetTestCase):
2325+
DATASET_CLASS = datasets.GTSRB
2326+
FEATURE_TYPES = (PIL.Image.Image, int)
2327+
2328+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False))
2329+
2330+
def inject_fake_data(self, tmpdir: str, config):
2331+
root_folder = os.path.join(tmpdir, "GTSRB")
2332+
os.makedirs(root_folder, exist_ok=True)
2333+
2334+
# Train data
2335+
train_folder = os.path.join(root_folder, "Training")
2336+
os.makedirs(train_folder, exist_ok=True)
2337+
2338+
num_examples = 3
2339+
classes = ("00000", "00042", "00012")
2340+
for class_idx in classes:
2341+
datasets_utils.create_image_folder(
2342+
train_folder,
2343+
name=class_idx,
2344+
file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm",
2345+
num_examples=num_examples,
2346+
)
2347+
2348+
total_number_of_examples = num_examples * len(classes)
2349+
# Test data
2350+
test_folder = os.path.join(root_folder, "Final_Test", "Images")
2351+
os.makedirs(test_folder, exist_ok=True)
2352+
2353+
with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file:
2354+
csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n")
2355+
2356+
for _ in range(total_number_of_examples):
2357+
image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm"
2358+
datasets_utils.create_image_file(test_folder, image_file)
2359+
row = [
2360+
image_file,
2361+
torch.randint(1, 100, size=()).item(),
2362+
torch.randint(1, 100, size=()).item(),
2363+
torch.randint(1, 100, size=()).item(),
2364+
torch.randint(1, 100, size=()).item(),
2365+
torch.randint(1, 100, size=()).item(),
2366+
torch.randint(1, 100, size=()).item(),
2367+
torch.randint(0, 43, size=()).item(),
2368+
]
2369+
csv_file.write(";".join(map(str, row)) + "\n")
2370+
2371+
return total_number_of_examples
2372+
2373+
2374+
class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase):
2375+
DATASET_CLASS = datasets.CLEVRClassification
2376+
FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
2377+
2378+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
2379+
2380+
def inject_fake_data(self, tmpdir, config):
2381+
data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0"
2382+
2383+
images_folder = data_folder / "images"
2384+
image_files = datasets_utils.create_image_folder(
2385+
images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5
2386+
)
2387+
2388+
scenes_folder = data_folder / "scenes"
2389+
scenes_folder.mkdir()
2390+
if config["split"] != "test":
2391+
with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file:
2392+
json.dump(
2393+
dict(
2394+
info=dict(),
2395+
scenes=[
2396+
dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ())))
2397+
for image_file in image_files
2398+
],
2399+
),
2400+
file,
2401+
)
2402+
2403+
return len(image_files)
2404+
2405+
2406+
class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase):
2407+
DATASET_CLASS = datasets.OxfordIIITPet
2408+
FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None)))
2409+
2410+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
2411+
split=("trainval", "test"),
2412+
target_types=("category", "segmentation", ["category", "segmentation"], []),
2413+
)
2414+
2415+
def inject_fake_data(self, tmpdir, config):
2416+
base_folder = os.path.join(tmpdir, "oxford-iiit-pet")
2417+
2418+
classification_anns_meta = (
2419+
dict(cls="Abyssinian", label=0, species="cat"),
2420+
dict(cls="Keeshond", label=18, species="dog"),
2421+
dict(cls="Yorkshire Terrier", label=37, species="dog"),
2422+
)
2423+
split_and_classification_anns = [
2424+
self._meta_to_split_and_classification_ann(meta, idx)
2425+
for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10))
2426+
]
2427+
image_ids, *_ = zip(*split_and_classification_anns)
2428+
2429+
image_files = datasets_utils.create_image_folder(
2430+
base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids)
2431+
)
2432+
2433+
anns_folder = os.path.join(base_folder, "annotations")
2434+
os.makedirs(anns_folder)
2435+
split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2)
2436+
with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file:
2437+
writer = csv.writer(file, delimiter=" ")
2438+
for split_and_classification_ann in split_and_classification_anns_in_split:
2439+
writer.writerow(split_and_classification_ann)
2440+
2441+
segmentation_files = datasets_utils.create_image_folder(
2442+
anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids)
2443+
)
2444+
2445+
# The dataset has some rogue files
2446+
for path in image_files[:2]:
2447+
path.with_suffix(".mat").touch()
2448+
for path in segmentation_files:
2449+
path.with_name(f".{path.name}").touch()
2450+
2451+
return len(split_and_classification_anns_in_split)
2452+
2453+
def _meta_to_split_and_classification_ann(self, meta, idx):
2454+
image_id = "_".join(
2455+
[
2456+
*[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()],
2457+
str(idx),
2458+
]
2459+
)
2460+
class_id = str(meta["label"] + 1)
2461+
species = "1" if meta["species"] == "cat" else "2"
2462+
breed_id = "-1"
2463+
return (image_id, class_id, species, breed_id)
2464+
2465+
22542466
if __name__ == "__main__":
22552467
unittest.main()

test/test_image.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,8 @@ def test_write_jpeg_reference(img_path, tmpdir):
478478
assert_equal(torch_bytes, pil_bytes)
479479

480480

481-
@pytest.mark.skipif(IS_WINDOWS, reason=("this test fails on windows because PIL uses libjpeg-turbo on windows"))
481+
# TODO: Remove the skip. See https://github.com/pytorch/vision/issues/5162.
482+
@pytest.mark.skip("this test fails because PIL uses libjpeg-turbo")
482483
@pytest.mark.parametrize(
483484
"img_path",
484485
[pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg")],
@@ -497,7 +498,8 @@ def test_encode_jpeg(img_path):
497498
assert_equal(encoded_jpeg_torch, encoded_jpeg_pil)
498499

499500

500-
@pytest.mark.skipif(IS_WINDOWS, reason=("this test fails on windows because PIL uses libjpeg-turbo on windows"))
501+
# TODO: Remove the skip. See https://github.com/pytorch/vision/issues/5162.
502+
@pytest.mark.skip("this test fails because PIL uses libjpeg-turbo")
501503
@pytest.mark.parametrize(
502504
"img_path",
503505
[pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg")],

test/test_prototype_models.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,29 +94,47 @@ def test_naming_conventions(model_fn):
9494
+ TM.get_models_from_module(models.video)
9595
+ TM.get_models_from_module(models.optical_flow),
9696
)
97+
@run_if_test_with_prototype
9798
def test_schema_meta_validation(model_fn):
9899
classification_fields = ["size", "categories", "acc@1", "acc@5"]
99100
defaults = {
100-
"all": ["interpolation", "recipe"],
101+
"all": ["task", "architecture", "publication_year", "interpolation", "recipe", "num_params"],
101102
"models": classification_fields,
102103
"detection": ["categories", "map"],
103104
"quantization": classification_fields + ["backend", "quantization", "unquantized"],
104105
"segmentation": ["categories", "mIoU", "acc"],
105106
"video": classification_fields,
106107
"optical_flow": [],
107108
}
109+
model_name = model_fn.__name__
108110
module_name = model_fn.__module__.split(".")[-2]
109111
fields = set(defaults["all"] + defaults[module_name])
110112

111113
weights_enum = _get_model_weights(model_fn)
114+
if len(weights_enum) == 0:
115+
pytest.skip(f"Model '{model_name}' doesn't have any pre-trained weights.")
112116

113117
problematic_weights = {}
118+
incorrect_params = []
114119
for w in weights_enum:
115120
missing_fields = fields - set(w.meta.keys())
116121
if missing_fields:
117122
problematic_weights[w] = missing_fields
123+
if w == weights_enum.default:
124+
if module_name == "quantization":
125+
# parametes() cound doesn't work well with quantization, so we check against the non-quantized
126+
unquantized_w = w.meta.get("unquantized")
127+
if unquantized_w is not None and w.meta.get("num_params") != unquantized_w.meta.get("num_params"):
128+
incorrect_params.append(w)
129+
else:
130+
if w.meta.get("num_params") != sum(p.numel() for p in model_fn(weights=w).parameters()):
131+
incorrect_params.append(w)
132+
else:
133+
if w.meta.get("num_params") != weights_enum.default.meta.get("num_params"):
134+
incorrect_params.append(w)
118135

119136
assert not problematic_weights
137+
assert not incorrect_params
120138

121139

122140
@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models))

0 commit comments

Comments
 (0)