Skip to content

Commit 7d70aab

Browse files
committed
Merge branch 'main' into datasets/fer2013
2 parents 68982c6 + 5c9c835 commit 7d70aab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+2199
-993
lines changed

.circleci/config.yml

Lines changed: 109 additions & 929 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.circleci/regenerate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from jinja2 import select_autoescape
2222

2323

24-
PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
24+
PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
2525

2626
RC_PATTERN = r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
2727

README.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ supported Python versions.
2121
+--------------------------+--------------------------+---------------------------------+
2222
| ``torch`` | ``torchvision`` | ``python`` |
2323
+==========================+==========================+=================================+
24-
| ``main`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6``, ``<=3.9`` |
24+
| ``main`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.7``, ``<=3.9`` |
25+
+--------------------------+--------------------------+---------------------------------+
26+
| ``1.10.1`` | ``0.11.2`` | ``>=3.6``, ``<=3.9`` |
2527
+--------------------------+--------------------------+---------------------------------+
2628
| ``1.10.0`` | ``0.11.1`` | ``>=3.6``, ``<=3.9`` |
2729
+--------------------------+--------------------------+---------------------------------+

docs/source/datasets.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
3838
Cityscapes
3939
CocoCaptions
4040
CocoDetection
41+
DTD
4142
EMNIST
4243
FakeData
4344
FashionMNIST
@@ -46,6 +47,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
4647
Flickr30k
4748
FlyingChairs
4849
FlyingThings3D
50+
Food101
4951
HD1K
5052
HMDB51
5153
ImageNet

packaging/windows/internal/cuda_install.bat

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
212212
echo Installing CUDA toolkit...
213213
7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda"
214214
pushd "%SRC_DIR%\temp_build\cuda"
215+
sc config wuauserv start= disabled
216+
sc stop wuauserv
217+
sc query wuauserv
218+
215219
start /wait setup.exe -s %ARGS% -loglevel:6 -log:"%cd%/cuda_install_logs"
216220
echo %errorlevel%
217221

references/classification/README.md

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,25 +145,57 @@ Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`.
145145

146146
### Vision Transformer
147147

148-
#### Base models
148+
#### vit_b_16
149149
```
150150
torchrun --nproc_per_node=8 train.py\
151-
--model $MODEL --epochs 300 --batch-size 64 --opt adamw --lr 0.003 --wd 0.3\
151+
--model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
152152
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
153153
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
154154
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
155155
```
156-
Here `$MODEL` is one of `vit_b_16` and `vit_b_32`.
157156

158-
#### Large models
157+
Note that the above command corresponds to training on a single node with 8 GPUs.
158+
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
159+
and `--batch_size 64`.
160+
161+
#### vit_b_32
159162
```
160163
torchrun --nproc_per_node=8 train.py\
161-
--model $MODEL --epochs 300 --batch-size 16 --opt adamw --lr 0.003 --wd 0.3\
164+
--model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
165+
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
166+
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\
167+
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
168+
```
169+
170+
Note that the above command corresponds to training on a single node with 8 GPUs.
171+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
172+
and `--batch_size 256`.
173+
174+
#### vit_l_16
175+
```
176+
torchrun --nproc_per_node=8 train.py\
177+
--model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\
178+
--lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\
179+
--auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\
180+
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232
181+
```
182+
183+
Note that the above command corresponds to training on a single node with 8 GPUs.
184+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
185+
and `--batch_size 64`.
186+
187+
#### vit_l_32
188+
```
189+
torchrun --nproc_per_node=8 train.py\
190+
--model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
162191
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
163192
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
164193
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
165194
```
166-
Here `$MODEL` is one of `vit_l_16` and `vit_l_32`.
195+
196+
Note that the above command corresponds to training on a single node with 8 GPUs.
197+
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
198+
and `--batch_size 64`.
167199

168200
## Mixed precision training
169201
Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).

references/classification/sampler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0,
3636
self.repetitions = repetitions
3737

3838
def __iter__(self):
39-
# Deterministically shuffle based on epoch
40-
g = torch.Generator()
41-
g.manual_seed(self.seed + self.epoch)
4239
if self.shuffle:
40+
# Deterministically shuffle based on epoch
41+
g = torch.Generator()
42+
g.manual_seed(self.seed + self.epoch)
4343
indices = torch.randperm(len(self.dataset), generator=g).tolist()
4444
else:
4545
indices = list(range(len(self.dataset)))

setup.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,59 @@ def get_extensions():
427427
)
428428
)
429429

430+
# Locating video codec
431+
# CUDA_HOME should be set to the cuda root directory.
432+
# TORCHVISION_INCLUDE and TORCHVISION_LIBRARY should include the location to
433+
# video codec header files and libraries respectively.
434+
video_codec_found = (
435+
extension is CUDAExtension
436+
and CUDA_HOME is not None
437+
and any([os.path.exists(os.path.join(folder, "cuviddec.h")) for folder in vision_include])
438+
and any([os.path.exists(os.path.join(folder, "nvcuvid.h")) for folder in vision_include])
439+
and any([os.path.exists(os.path.join(folder, "libnvcuvid.so")) for folder in library_dirs])
440+
)
441+
442+
print(f"video codec found: {video_codec_found}")
443+
444+
if (
445+
video_codec_found
446+
and has_ffmpeg
447+
and any([os.path.exists(os.path.join(folder, "libavcodec", "bsf.h")) for folder in ffmpeg_include_dir])
448+
):
449+
gpu_decoder_path = os.path.join(extensions_dir, "io", "decoder", "gpu")
450+
gpu_decoder_src = glob.glob(os.path.join(gpu_decoder_path, "*.cpp"))
451+
cuda_libs = os.path.join(CUDA_HOME, "lib64")
452+
cuda_inc = os.path.join(CUDA_HOME, "include")
453+
454+
ext_modules.append(
455+
extension(
456+
"torchvision.Decoder",
457+
gpu_decoder_src,
458+
include_dirs=include_dirs + [gpu_decoder_path] + [cuda_inc] + ffmpeg_include_dir,
459+
library_dirs=ffmpeg_library_dir + library_dirs + [cuda_libs],
460+
libraries=[
461+
"avcodec",
462+
"avformat",
463+
"avutil",
464+
"swresample",
465+
"swscale",
466+
"nvcuvid",
467+
"cuda",
468+
"cudart",
469+
"z",
470+
"pthread",
471+
"dl",
472+
],
473+
extra_compile_args=extra_compile_args,
474+
)
475+
)
476+
else:
477+
print(
478+
"The installed version of ffmpeg is missing the header file 'bsf.h' which is "
479+
"required for GPU video decoding. Please install the latest ffmpeg from conda-forge channel:"
480+
" `conda install -c conda-forge ffmpeg`."
481+
)
482+
430483
return ext_modules
431484

432485

test/test_datasets.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,6 +2169,79 @@ def inject_fake_data(self, tmpdir, config):
21692169
return num_sequences * (num_examples_per_sequence - 1)
21702170

21712171

2172+
class Food101TestCase(datasets_utils.ImageDatasetTestCase):
2173+
DATASET_CLASS = datasets.Food101
2174+
FEATURE_TYPES = (PIL.Image.Image, int)
2175+
2176+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
2177+
2178+
def inject_fake_data(self, tmpdir: str, config):
2179+
root_folder = pathlib.Path(tmpdir) / "food-101"
2180+
image_folder = root_folder / "images"
2181+
meta_folder = root_folder / "meta"
2182+
2183+
image_folder.mkdir(parents=True)
2184+
meta_folder.mkdir()
2185+
2186+
num_images_per_class = 5
2187+
2188+
metadata = {}
2189+
n_samples_per_class = 3 if config["split"] == "train" else 2
2190+
sampled_classes = ("apple_pie", "crab_cakes", "gyoza")
2191+
for cls in sampled_classes:
2192+
im_fnames = datasets_utils.create_image_folder(
2193+
image_folder,
2194+
cls,
2195+
file_name_fn=lambda idx: f"{idx}.jpg",
2196+
num_examples=num_images_per_class,
2197+
)
2198+
metadata[cls] = [
2199+
"/".join(fname.relative_to(image_folder).with_suffix("").parts)
2200+
for fname in random.choices(im_fnames, k=n_samples_per_class)
2201+
]
2202+
2203+
with open(meta_folder / f"{config['split']}.json", "w") as file:
2204+
file.write(json.dumps(metadata))
2205+
2206+
return len(sampled_classes * n_samples_per_class)
2207+
2208+
2209+
class DTDTestCase(datasets_utils.ImageDatasetTestCase):
2210+
DATASET_CLASS = datasets.DTD
2211+
FEATURE_TYPES = (PIL.Image.Image, int)
2212+
2213+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
2214+
split=("train", "test", "val"),
2215+
# There is no need to test the whole matrix here, since each fold is treated exactly the same
2216+
partition=(1, 5, 10),
2217+
)
2218+
2219+
def inject_fake_data(self, tmpdir: str, config):
2220+
data_folder = pathlib.Path(tmpdir) / "dtd" / "dtd"
2221+
2222+
num_images_per_class = 3
2223+
image_folder = data_folder / "images"
2224+
image_files = []
2225+
for cls in ("banded", "marbled", "zigzagged"):
2226+
image_files.extend(
2227+
datasets_utils.create_image_folder(
2228+
image_folder,
2229+
cls,
2230+
file_name_fn=lambda idx: f"{cls}_{idx:04d}.jpg",
2231+
num_examples=num_images_per_class,
2232+
)
2233+
)
2234+
2235+
meta_folder = data_folder / "labels"
2236+
meta_folder.mkdir()
2237+
image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files]
2238+
image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2)
2239+
with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file:
2240+
file.write("\n".join(image_ids_in_config) + "\n")
2241+
2242+
return len(image_ids_in_config)
2243+
2244+
21722245
class FER2013TestCase(datasets_utils.ImageDatasetTestCase):
21732246
DATASET_CLASS = datasets.FER2013
21742247
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))

test/test_video_gpu_decoder.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import os
2+
3+
import pytest
4+
import torch
5+
from torchvision.io import _HAS_VIDEO_DECODER, VideoReader
6+
7+
try:
8+
import av
9+
except ImportError:
10+
av = None
11+
12+
VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")
13+
14+
test_videos = [
15+
"RATRACE_wave_f_nm_np1_fr_goo_37.avi",
16+
"TrumanShow_wave_f_nm_np1_fr_med_26.avi",
17+
"v_SoccerJuggling_g23_c01.avi",
18+
"v_SoccerJuggling_g24_c01.avi",
19+
"R6llTwEh07w.mp4",
20+
"SOX5yA1l24A.mp4",
21+
"WUzgd7C1pWA.mp4",
22+
]
23+
24+
25+
@pytest.mark.skipif(_HAS_VIDEO_DECODER is False, reason="Didn't compile with support for gpu decoder")
26+
class TestVideoGPUDecoder:
27+
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
28+
def test_frame_reading(self):
29+
for test_video in test_videos:
30+
full_path = os.path.join(VIDEO_DIR, test_video)
31+
decoder = VideoReader(full_path, device="cuda:0")
32+
with av.open(full_path) as container:
33+
for av_frame in container.decode(container.streams.video[0]):
34+
av_frames = torch.tensor(av_frame.to_ndarray().flatten())
35+
vision_frames = next(decoder)["data"]
36+
mean_delta = torch.mean(torch.abs(av_frames.float() - decoder._reformat(vision_frames).float()))
37+
assert mean_delta < 0.1
38+
39+
40+
if __name__ == "__main__":
41+
pytest.main([__file__])

0 commit comments

Comments
 (0)