Skip to content

Commit f248e70

Browse files
committed
Merge branch 'main' into datasets/oxford-pets
2 parents 323108a + f01b533 commit f248e70

File tree

107 files changed

+3109
-1015
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+3109
-1015
lines changed

.circleci/config.yml

Lines changed: 109 additions & 929 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.circleci/regenerate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from jinja2 import select_autoescape
2222

2323

24-
PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
24+
PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
2525

2626
RC_PATTERN = r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
2727

README.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ supported Python versions.
2121
+--------------------------+--------------------------+---------------------------------+
2222
| ``torch`` | ``torchvision`` | ``python`` |
2323
+==========================+==========================+=================================+
24-
| ``main`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6``, ``<=3.9`` |
24+
| ``main`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.7``, ``<=3.9`` |
25+
+--------------------------+--------------------------+---------------------------------+
26+
| ``1.10.1`` | ``0.11.2`` | ``>=3.6``, ``<=3.9`` |
2527
+--------------------------+--------------------------+---------------------------------+
2628
| ``1.10.0`` | ``0.11.1`` | ``>=3.6``, ``<=3.9`` |
2729
+--------------------------+--------------------------+---------------------------------+

docs/source/datasets.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,17 @@ You can also create your own datasets using the provided :ref:`base classes <bas
3838
Cityscapes
3939
CocoCaptions
4040
CocoDetection
41+
DTD
4142
EMNIST
4243
FakeData
4344
FashionMNIST
45+
FER2013
4446
Flickr8k
4547
Flickr30k
4648
FlyingChairs
4749
FlyingThings3D
50+
Food101
51+
GTSRB
4852
HD1K
4953
HMDB51
5054
ImageNet

packaging/windows/internal/cuda_install.bat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
212212
echo Installing CUDA toolkit...
213213
7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda"
214214
pushd "%SRC_DIR%\temp_build\cuda"
215+
sc config wuauserv start= disabled
216+
sc stop wuauserv
217+
sc query wuauserv
218+
215219
start /wait setup.exe -s %ARGS% -loglevel:6 -log:"%cd%/cuda_install_logs"
216220
echo %errorlevel%
217221

references/classification/README.md

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,25 +145,57 @@ Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`.
145145

146146
### Vision Transformer
147147

148-
#### Base models
148+
#### vit_b_16
149149
```
150150
torchrun --nproc_per_node=8 train.py\
151-
--model $MODEL --epochs 300 --batch-size 64 --opt adamw --lr 0.003 --wd 0.3\
151+
--model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
152152
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
153153
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
154154
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
155155
```
156-
Here `$MODEL` is one of `vit_b_16` and `vit_b_32`.
157156

158-
#### Large models
157+
Note that the above command corresponds to training on a single node with 8 GPUs.
158+
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
159+
and `--batch_size 64`.
160+
161+
#### vit_b_32
159162
```
160163
torchrun --nproc_per_node=8 train.py\
161-
--model $MODEL --epochs 300 --batch-size 16 --opt adamw --lr 0.003 --wd 0.3\
164+
--model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
165+
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
166+
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\
167+
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
168+
```
169+
170+
Note that the above command corresponds to training on a single node with 8 GPUs.
171+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
172+
and `--batch_size 256`.
173+
174+
#### vit_l_16
175+
```
176+
torchrun --nproc_per_node=8 train.py\
177+
--model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\
178+
--lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\
179+
--auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\
180+
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232
181+
```
182+
183+
Note that the above command corresponds to training on a single node with 8 GPUs.
184+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
185+
and `--batch_size 64`.
186+
187+
#### vit_l_32
188+
```
189+
torchrun --nproc_per_node=8 train.py\
190+
--model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
162191
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
163192
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
164193
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
165194
```
166-
Here `$MODEL` is one of `vit_l_16` and `vit_l_32`.
195+
196+
Note that the above command corresponds to training on a single node with 8 GPUs.
197+
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
198+
and `--batch_size 64`.
167199

168200
## Mixed precision training
169201
Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).

references/classification/sampler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0,
3636
self.repetitions = repetitions
3737

3838
def __iter__(self):
39-
# Deterministically shuffle based on epoch
40-
g = torch.Generator()
41-
g.manual_seed(self.seed + self.epoch)
4239
if self.shuffle:
40+
# Deterministically shuffle based on epoch
41+
g = torch.Generator()
42+
g.manual_seed(self.seed + self.epoch)
4343
indices = torch.randperm(len(self.dataset), generator=g).tolist()
4444
else:
4545
indices = list(range(len(self.dataset)))

setup.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,59 @@ def get_extensions():
427427
)
428428
)
429429

430+
# Locating video codec
431+
# CUDA_HOME should be set to the cuda root directory.
432+
# TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the location to
433+
# video codec header files and libraries respectively.
434+
video_codec_found = (
435+
extension is CUDAExtension
436+
and CUDA_HOME is not None
437+
and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in vision_include])
438+
and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in vision_include])
439+
and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in library_dirs])
440+
)
441+
442+
print(f"video codec found: {video_codec_found}")
443+
444+
if (
445+
video_codec_found
446+
and has_ffmpeg
447+
and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
448+
):
449+
gpu_decoder_path = os.path.join(extensions_dir, "io", "decoder", "gpu")
450+
gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp"))
451+
cuda_libs = os.path.join(CUDA_HOME, "lib64")
452+
cuda_inc = os.path.join(CUDA_HOME, "include")
453+
454+
ext_modules.append(
455+
extension(
456+
"torchvision.Decoder",
457+
gpu_decoder_src,
458+
include_dirs=include_dirs + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir,
459+
library_dirs=ffmpeg_library_dir + library_dirs + [cuda_libs],
460+
libraries=[
461+
"avcodec",
462+
"avformat",
463+
"avutil",
464+
"swresample",
465+
"swscale",
466+
"nvcuvid",
467+
"cuda",
468+
"cudart",
469+
"z",
470+
"pthread",
471+
"dl",
472+
],
473+
extra_compile_args=extra_compile_args,
474+
)
475+
)
476+
else:
477+
print(
478+
"The installed version of ffmpeg is missing the header file 'bsf.h' which is "
479+
"required for GPU video decoding. Please install the latest ffmpeg from conda-forge channel:"
480+
" `conda install -c conda-forge ffmpeg`."
481+
)
482+
430483
return ext_modules
431484

432485

test/test_datasets.py

Lines changed: 189 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ def _annotation_file_name(self, fold, train):
854854

855855
def _create_annotation_file(self, root, name, video_files):
856856
with open(pathlib.Path(root) / name, "w") as fh:
857-
fh.writelines(f"{file}\n" for file in sorted(video_files))
857+
fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files))
858858

859859

860860
class LSUNTestCase(datasets_utils.ImageDatasetTestCase):
@@ -2169,6 +2169,194 @@ def inject_fake_data(self, tmpdir, config):
21692169
return num_sequences * (num_examples_per_sequence - 1)
21702170

21712171

2172+
class Food101TestCase(datasets_utils.ImageDatasetTestCase):
2173+
DATASET_CLASS = datasets.Food101
2174+
FEATURE_TYPES = (PIL.Image.Image, int)
2175+
2176+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
2177+
2178+
def inject_fake_data(self, tmpdir: str, config):
2179+
root_folder = pathlib.Path(tmpdir) / "food-101"
2180+
image_folder = root_folder / "images"
2181+
meta_folder = root_folder / "meta"
2182+
2183+
image_folder.mkdir(parents=True)
2184+
meta_folder.mkdir()
2185+
2186+
num_images_per_class = 5
2187+
2188+
metadata = {}
2189+
n_samples_per_class = 3 if config["split"] == "train" else 2
2190+
sampled_classes = ("apple_pie", "crab_cakes", "gyoza")
2191+
for cls in sampled_classes:
2192+
im_fnames = datasets_utils.create_image_folder(
2193+
image_folder,
2194+
cls,
2195+
file_name_fn=lambda idx: f"{idx}.jpg",
2196+
num_examples=num_images_per_class,
2197+
)
2198+
metadata[cls] = [
2199+
"/".join(fname.relative_to(image_folder).with_suffix("").parts)
2200+
for fname in random.choices(im_fnames, k=n_samples_per_class)
2201+
]
2202+
2203+
with open(meta_folder / f"{config['split']}.json", "w") as file:
2204+
file.write(json.dumps(metadata))
2205+
2206+
return len(sampled_classes * n_samples_per_class)
2207+
2208+
2209+
class DTDTestCase(datasets_utils.ImageDatasetTestCase):
2210+
DATASET_CLASS = datasets.DTD
2211+
FEATURE_TYPES = (PIL.Image.Image, int)
2212+
2213+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
2214+
split=("train", "test", "val"),
2215+
# There is no need to test the whole matrix here, since each fold is treated exactly the same
2216+
partition=(1, 5, 10),
2217+
)
2218+
2219+
def inject_fake_data(self, tmpdir: str, config):
2220+
data_folder = pathlib.Path(tmpdir) / "dtd" / "dtd"
2221+
2222+
num_images_per_class = 3
2223+
image_folder = data_folder / "images"
2224+
image_files = []
2225+
for cls in ("banded", "marbled", "zigzagged"):
2226+
image_files.extend(
2227+
datasets_utils.create_image_folder(
2228+
image_folder,
2229+
cls,
2230+
file_name_fn=lambda idx: f"{cls}_{idx:04d}.jpg",
2231+
num_examples=num_images_per_class,
2232+
)
2233+
)
2234+
2235+
meta_folder = data_folder / "labels"
2236+
meta_folder.mkdir()
2237+
image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files]
2238+
image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2)
2239+
with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file:
2240+
file.write("\n".join(image_ids_in_config) + "\n")
2241+
2242+
return len(image_ids_in_config)
2243+
2244+
2245+
class FER2013TestCase(datasets_utils.ImageDatasetTestCase):
2246+
DATASET_CLASS = datasets.FER2013
2247+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
2248+
2249+
FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
2250+
2251+
def inject_fake_data(self, tmpdir, config):
2252+
base_folder = os.path.join(tmpdir, "fer2013")
2253+
os.makedirs(base_folder)
2254+
2255+
num_samples = 5
2256+
with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file:
2257+
writer = csv.DictWriter(
2258+
file,
2259+
fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",),
2260+
quoting=csv.QUOTE_NONNUMERIC,
2261+
quotechar='"',
2262+
)
2263+
writer.writeheader()
2264+
for _ in range(num_samples):
2265+
row = dict(
2266+
pixels=" ".join(
2267+
str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
2268+
)
2269+
)
2270+
if config["split"] == "train":
2271+
row["emotion"] = str(int(torch.randint(0, 7, ())))
2272+
2273+
writer.writerow(row)
2274+
2275+
return num_samples
2276+
2277+
2278+
class GTSRBTestCase(datasets_utils.ImageDatasetTestCase):
2279+
DATASET_CLASS = datasets.GTSRB
2280+
FEATURE_TYPES = (PIL.Image.Image, int)
2281+
2282+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False))
2283+
2284+
def inject_fake_data(self, tmpdir: str, config):
2285+
root_folder = os.path.join(tmpdir, "GTSRB")
2286+
os.makedirs(root_folder, exist_ok=True)
2287+
2288+
# Train data
2289+
train_folder = os.path.join(root_folder, "Training")
2290+
os.makedirs(train_folder, exist_ok=True)
2291+
2292+
num_examples = 3
2293+
classes = ("00000", "00042", "00012")
2294+
for class_idx in classes:
2295+
datasets_utils.create_image_folder(
2296+
train_folder,
2297+
name=class_idx,
2298+
file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm",
2299+
num_examples=num_examples,
2300+
)
2301+
2302+
total_number_of_examples = num_examples * len(classes)
2303+
# Test data
2304+
test_folder = os.path.join(root_folder, "Final_Test", "Images")
2305+
os.makedirs(test_folder, exist_ok=True)
2306+
2307+
with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file:
2308+
csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n")
2309+
2310+
for _ in range(total_number_of_examples):
2311+
image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm"
2312+
datasets_utils.create_image_file(test_folder, image_file)
2313+
row = [
2314+
image_file,
2315+
torch.randint(1, 100, size=()).item(),
2316+
torch.randint(1, 100, size=()).item(),
2317+
torch.randint(1, 100, size=()).item(),
2318+
torch.randint(1, 100, size=()).item(),
2319+
torch.randint(1, 100, size=()).item(),
2320+
torch.randint(1, 100, size=()).item(),
2321+
torch.randint(0, 43, size=()).item(),
2322+
]
2323+
csv_file.write(";".join(map(str, row)) + "\n")
2324+
2325+
return total_number_of_examples
2326+
2327+
2328+
class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase):
2329+
DATASET_CLASS = datasets.CLEVRClassification
2330+
FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
2331+
2332+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
2333+
2334+
def inject_fake_data(self, tmpdir, config):
2335+
data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0"
2336+
2337+
images_folder = data_folder / "images"
2338+
image_files = datasets_utils.create_image_folder(
2339+
images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5
2340+
)
2341+
2342+
scenes_folder = data_folder / "scenes"
2343+
scenes_folder.mkdir()
2344+
if config["split"] != "test":
2345+
with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file:
2346+
json.dump(
2347+
dict(
2348+
info=dict(),
2349+
scenes=[
2350+
dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ())))
2351+
for image_file in image_files
2352+
],
2353+
),
2354+
file,
2355+
)
2356+
2357+
return len(image_files)
2358+
2359+
21722360
class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase):
21732361
DATASET_CLASS = datasets.OxfordIIITPet
21742362
FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None)))

0 commit comments

Comments
 (0)