pytorch · NicolasHug · Jan 10, 2022 · Dec 30, 2021 · Dec 30, 2021 · Dec 31, 2021
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -38,6 +38,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
     Cityscapes
     CocoCaptions
     CocoDetection
+    Country211
     DTD
     EMNIST
     FakeData

diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -2463,5 +2463,29 @@ def _meta_to_split_and_classification_ann(self, meta, idx):
         return (image_id, class_id, species, breed_id)
 
 
+class Country211TestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.Country211
+    FEATURE_TYPES = (PIL.Image.Image, int)
+
+    ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test"))
+
+    def inject_fake_data(self, tmpdir: str, config):
+        split_folder = pathlib.Path(tmpdir) / "country211" / config["split"]
+        split_folder.mkdir(parents=True, exist_ok=True)
+        return sum(
+            [
+                len(
+                    datasets_utils.create_image_folder(
+                        split_folder,
+                        name=cls,
+                        file_name_fn=lambda idx: f"{idx}.jpg",
+                        num_examples=5,
+                    )
+                )
+                for cls in ("AD", "BS", "GR")
+            ]
+        )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py
@@ -5,6 +5,7 @@
 from .cityscapes import Cityscapes
 from .clevr import CLEVRClassification
 from .coco import CocoCaptions, CocoDetection
+from .country211 import Country211
 from .dtd import DTD
 from .fakedata import FakeData
 from .fer2013 import FER2013
@@ -91,4 +92,5 @@
     "GTSRB",
     "CLEVRClassification",
     "OxfordIIITPet",
+    "Country211",
 )
diff --git a/torchvision/datasets/country211.py b/torchvision/datasets/country211.py
@@ -0,0 +1,56 @@
+from pathlib import Path
+from typing import Callable, Optional
+
+from .folder import ImageFolder
+from .utils import verify_str_arg, download_and_extract_archive
+
+
+class Country211(ImageFolder):
+    """`The Country211 Data Set <https://github.com/openai/CLIP/blob/main/data/country211.md>`_.
+
+    filtered the YFCC100m dataset that have GPS coordinate corresponding to a ISO-3166 country code
+    and created a balanced dataset by sampling 150 train images, 50 validation images,
+    and 100 test images images for each country.
+
+
+    Args:
+        root (string): Root directory of the dataset.
+        split (string, optional): The dataset split, supports ``"train"`` (default), ``"valid"`` and ``"test"``.
+        transform (callable, optional): A function/transform that  takes in an PIL image and returns a transformed
+            version. E.g, ``transforms.RandomCrop``.
+        target_transform (callable, optional): A function/transform that takes in the target and transforms it.
+    """
+
+    _URL = "https://openaipublic.azureedge.net/clip/data/country211.tgz"
+    _MD5 = "84988d7644798601126c29e9877aab6a"
+
+    def __init__(
+        self,
+        root: str,
+        split: str = "train",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = True,
+    ) -> None:
+        self._split = verify_str_arg(split, "split", ("train", "valid", "test"))
+
+        root = Path(root).expanduser()
+        self.root = str(root)
+        self._base_folder = root / "country211"
+
+        if download:
+            self._download()
+
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found. You can use download=True to download it")
+
+        super().__init__(str(self._base_folder / self._split), transform=transform, target_transform=target_transform)
+        self.root = str(root)
+
+    def _check_exists(self) -> bool:
+        return self._base_folder.exists() and self._base_folder.is_dir()
+
+    def _download(self) -> None:
+        if self._check_exists():
+            return
+        download_and_extract_archive(self._URL, download_root=self.root, md5=self._MD5)