-
Notifications
You must be signed in to change notification settings - Fork 7.1k
Add vggface2 dataset #2910
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Add vggface2 dataset #2910
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
53e0ffd
add vggface dataset class
jgbradley1 5e75f3c
fix flake8 errors
jgbradley1 79a8b49
Merge branch 'master' into add-vggface2-dataset
jgbradley1 dc36580
add standard dataset arguments
jgbradley1 ab31f88
fix code formatting and standardize dataset class
jgbradley1 9d3590d
add dataset citation
jgbradley1 0ea263a
more formatting fixes
jgbradley1 cdfc612
Merge branch 'master' into add-vggface2-dataset
jgbradley1 7bb168e
docstring update
jgbradley1 d7da9f6
xMerge branch 'master' into add-vggface2-dataset
jgbradley1 7510edf
formatting update
jgbradley1 170eff1
remove unused variable
jgbradley1 d8d5e02
use double quoted strings
jgbradley1 58dfd04
add vggface2 unit test
jgbradley1 f464212
Merge branch 'master' into add-vggface2-dataset
jgbradley1 baf22b7
add pandas check
jgbradley1 1bb70df
Merge branch 'add-vggface2-dataset' of github.com:jgbradley1/vision i…
jgbradley1 1256a35
add docstring to vggface fakedata generator
jgbradley1 941682b
fix docstring indentation
jgbradley1 a392514
use local variable scope and fixed minor docstring formatting
jgbradley1 7089156
minor style fixes
jgbradley1 a363104
Merge branch 'master' into add-vggface2-dataset
jgbradley1 7a03699
Merge branch 'master' into add-vggface2-dataset
jgbradley1 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
from functools import partial | ||
from PIL import Image | ||
import os | ||
import torch | ||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union | ||
from .utils import check_integrity, extract_archive, verify_str_arg | ||
from .vision import VisionDataset | ||
|
||
|
||
class VGGFace2(VisionDataset): | ||
""" VGGFace2 <http://zeus.robots.ox.ac.uk/vgg_face2/>`_ Dataset. | ||
|
||
Citation: | ||
@inproceedings{Cao18, | ||
author = "Cao, Q. and Shen, L. and Xie, W. and Parkhi, O. M. and Zisserman, A.", | ||
title = "VGGFace2: A dataset for recognising faces across pose and age", | ||
booktitle = "International Conference on Automatic Face and Gesture Recognition", | ||
year = "2018"} | ||
|
||
Args: | ||
root (string): Root directory of the VGGFace2 Dataset. | ||
Expects the following folder structure if download=False: | ||
<root> | ||
└── vggface2 | ||
├── bb_landmark.tar.gz (or 'bb_landmark' if uncompressed) | ||
├── vggface2_train.tar.gz (or 'train' if uncompressed) | ||
├── vggface2_test.tar.gz (or 'test' if uncompressed) | ||
├── train_list.txt | ||
└── test_list.txt | ||
split (string): The dataset split to use. One of {``train``, ``test``}. | ||
Defaults to ``train``. | ||
target_type (string): The type of target to use. One of | ||
{``class_id``, ``image_id``, ``face_id``, ``bbox``, ``landmarks``.``""``} | ||
Can also be a list to output a tuple with all specified target types. | ||
The targets represent: | ||
``class_id`` (string) | ||
``image_id`` (string) | ||
``face_id`` (string) | ||
``bbox`` (torch.tensor shape=(4,) dtype=int): bounding box (x, y, width, height) | ||
``landmarks`` (torch.tensor shape=(10,) dtype=float): values that | ||
represent five points (P1X, P1Y, P2X, P2Y, P3X, P3Y, P4X, P4Y, P5X, P5Y) | ||
Defaults to ``bbox``. If empty, ``None`` will be returned as target. | ||
transform (callable, optional): A function/transform that takes in a PIL image | ||
and returns a transformed version. E.g, ``transforms.RandomCrop`` | ||
target_transform (callable, optional): A function/transform that takes in the | ||
target and transforms it. | ||
download (bool, optional): If true, downloads the dataset from the internet and | ||
puts it in root directory. If dataset is already downloaded, it is not | ||
downloaded again. | ||
""" | ||
|
||
BASE_FOLDER = "vggface2" | ||
FILE_LIST = [ | ||
# Filename MD5 Hash Uncompressed filename | ||
("vggface2_train.tar.gz", "88813c6b15de58afc8fa75ea83361d7f", "train"), | ||
("vggface2_test.tar.gz", "bb7a323824d1004e14e00c23974facd3", "test"), | ||
("bb_landmark.tar.gz", "26f7ba288a782862d137348a1cb97540", "bb_landmark") | ||
] | ||
|
||
def __init__( | ||
self, | ||
root: str, | ||
split: str = "train", | ||
target_type: Union[List[str], str] = "bbox", | ||
transform: Optional[Callable] = None, | ||
target_transform: Optional[Callable] = None, | ||
download: bool = False, | ||
) -> None: | ||
import pandas | ||
super(VGGFace2, self).__init__(root=os.path.join(root, self.BASE_FOLDER), | ||
transform=transform, | ||
target_transform=target_transform) | ||
|
||
# stay consistent with other datasets and check for a download option | ||
if download: | ||
msg = ("The dataset is not publicly accessible. You must login and " | ||
"download the archives externally and place them in the root " | ||
"directory.") | ||
raise RuntimeError(msg) | ||
|
||
# check arguments | ||
self.split = verify_str_arg(split, "split", ("train", "test")) | ||
self.img_info: List[Dict[str, object]] = [] | ||
|
||
if isinstance(target_type, list): | ||
self.target_type = target_type | ||
else: | ||
self.target_type = [target_type] | ||
self.target_type = [verify_str_arg(t, "target_type", | ||
("class_id", "image_id", "face_id", "bbox", "landmarks", "")) | ||
for t in self.target_type] | ||
|
||
if not self.target_type and self.target_transform is not None: | ||
raise RuntimeError("target_transform is specified but target_type is empty") | ||
|
||
image_list_file = "train_list.txt" if self.split == "train" else "test_list.txt" | ||
image_list_file = os.path.join(self.root, image_list_file) | ||
|
||
# prepare dataset | ||
for (filename, _, extracted_dir) in self.FILE_LIST: | ||
filename = os.path.join(self.root, filename) | ||
extracted_dir_path = os.path.join(self.root, extracted_dir) | ||
if not os.path.isdir(extracted_dir_path): | ||
extract_archive(filename) | ||
|
||
# process dataset | ||
fn = partial(os.path.join, self.root, self.FILE_LIST[2][2]) | ||
bbox_frames = [pandas.read_csv(fn("loose_bb_train.csv"), index_col=0), | ||
pandas.read_csv(fn("loose_bb_test.csv"), index_col=0)] | ||
self.bbox = pandas.concat(bbox_frames) | ||
landmark_frames = [pandas.read_csv(fn("loose_landmark_train.csv"), index_col=0), | ||
pandas.read_csv(fn("loose_landmark_test.csv"), index_col=0)] | ||
self.landmarks = pandas.concat(landmark_frames) | ||
|
||
with open(image_list_file, 'r') as f: | ||
for img_file in f: | ||
img_file = img_file.strip() | ||
img_filename, ext = os.path.splitext(img_file) # e.g. ["n004332/0317_01", "jpg"] | ||
class_id, image_face_id = img_filename.split("/") # e.g. ["n004332", "0317_01"] | ||
class_id = class_id[1:] | ||
image_id, face_id = image_face_id.split("_") | ||
img_filepath = os.path.join(self.root, self.split, img_file) | ||
self.img_info.append({ | ||
"img_path": img_filepath, | ||
"class_id": class_id, | ||
"image_id": image_id, | ||
"face_id": face_id, | ||
"bbox": torch.tensor(self.bbox.loc[img_filename].values), | ||
"landmarks": torch.tensor(self.landmarks.loc[img_filename].values), | ||
}) | ||
|
||
def __len__(self) -> int: | ||
return len(self.img_info) | ||
|
||
def __getitem__(self, index) -> Tuple[Any, Any]: | ||
# prepare image | ||
img = Image.open(self.img_info[index]["img_path"]) | ||
if self.transform: | ||
img = self.transform(img) | ||
|
||
# prepare target | ||
target: Any = [] | ||
for t in self.target_type: | ||
if t == "": | ||
target = None | ||
break | ||
target.append(self.img_info[index][t]) | ||
if target: | ||
target = tuple(target) if len(target) > 1 else target[0] | ||
if self.target_transform is not None: | ||
target = self.target_transform(target) | ||
|
||
return img, target | ||
|
||
def extra_repr(self) -> str: | ||
lines = ["Target type: {target_type}", "Split: {split}"] | ||
return "\n".join(lines).format(**self.__dict__) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.