|
1 | 1 | import bz2
|
2 | 2 | import contextlib
|
| 3 | +import csv |
3 | 4 | import io
|
4 | 5 | import itertools
|
5 | 6 | import json
|
@@ -853,7 +854,7 @@ def _annotation_file_name(self, fold, train):
|
853 | 854 |
|
854 | 855 | def _create_annotation_file(self, root, name, video_files):
|
855 | 856 | with open(pathlib.Path(root) / name, "w") as fh:
|
856 |
| - fh.writelines(f"{file}\n" for file in sorted(video_files)) |
| 857 | + fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files)) |
857 | 858 |
|
858 | 859 |
|
859 | 860 | class LSUNTestCase(datasets_utils.ImageDatasetTestCase):
|
@@ -2251,5 +2252,216 @@ def inject_fake_data(self, tmpdir: str, config):
|
2251 | 2252 | return num_samples
|
2252 | 2253 |
|
2253 | 2254 |
|
| 2255 | +class DTDTestCase(datasets_utils.ImageDatasetTestCase): |
| 2256 | + DATASET_CLASS = datasets.DTD |
| 2257 | + FEATURE_TYPES = (PIL.Image.Image, int) |
| 2258 | + |
| 2259 | + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( |
| 2260 | + split=("train", "test", "val"), |
| 2261 | + # There is no need to test the whole matrix here, since each fold is treated exactly the same |
| 2262 | + partition=(1, 5, 10), |
| 2263 | + ) |
| 2264 | + |
| 2265 | + def inject_fake_data(self, tmpdir: str, config): |
| 2266 | + data_folder = pathlib.Path(tmpdir) / "dtd" / "dtd" |
| 2267 | + |
| 2268 | + num_images_per_class = 3 |
| 2269 | + image_folder = data_folder / "images" |
| 2270 | + image_files = [] |
| 2271 | + for cls in ("banded", "marbled", "zigzagged"): |
| 2272 | + image_files.extend( |
| 2273 | + datasets_utils.create_image_folder( |
| 2274 | + image_folder, |
| 2275 | + cls, |
| 2276 | + file_name_fn=lambda idx: f"{cls}_{idx:04d}.jpg", |
| 2277 | + num_examples=num_images_per_class, |
| 2278 | + ) |
| 2279 | + ) |
| 2280 | + |
| 2281 | + meta_folder = data_folder / "labels" |
| 2282 | + meta_folder.mkdir() |
| 2283 | + image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files] |
| 2284 | + image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2) |
| 2285 | + with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file: |
| 2286 | + file.write("\n".join(image_ids_in_config) + "\n") |
| 2287 | + |
| 2288 | + return len(image_ids_in_config) |
| 2289 | + |
| 2290 | + |
| 2291 | +class FER2013TestCase(datasets_utils.ImageDatasetTestCase): |
| 2292 | + DATASET_CLASS = datasets.FER2013 |
| 2293 | + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) |
| 2294 | + |
| 2295 | + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) |
| 2296 | + |
| 2297 | + def inject_fake_data(self, tmpdir, config): |
| 2298 | + base_folder = os.path.join(tmpdir, "fer2013") |
| 2299 | + os.makedirs(base_folder) |
| 2300 | + |
| 2301 | + num_samples = 5 |
| 2302 | + with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file: |
| 2303 | + writer = csv.DictWriter( |
| 2304 | + file, |
| 2305 | + fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",), |
| 2306 | + quoting=csv.QUOTE_NONNUMERIC, |
| 2307 | + quotechar='"', |
| 2308 | + ) |
| 2309 | + writer.writeheader() |
| 2310 | + for _ in range(num_samples): |
| 2311 | + row = dict( |
| 2312 | + pixels=" ".join( |
| 2313 | + str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() |
| 2314 | + ) |
| 2315 | + ) |
| 2316 | + if config["split"] == "train": |
| 2317 | + row["emotion"] = str(int(torch.randint(0, 7, ()))) |
| 2318 | + |
| 2319 | + writer.writerow(row) |
| 2320 | + |
| 2321 | + return num_samples |
| 2322 | + |
| 2323 | + |
| 2324 | +class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): |
| 2325 | + DATASET_CLASS = datasets.GTSRB |
| 2326 | + FEATURE_TYPES = (PIL.Image.Image, int) |
| 2327 | + |
| 2328 | + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) |
| 2329 | + |
| 2330 | + def inject_fake_data(self, tmpdir: str, config): |
| 2331 | + root_folder = os.path.join(tmpdir, "GTSRB") |
| 2332 | + os.makedirs(root_folder, exist_ok=True) |
| 2333 | + |
| 2334 | + # Train data |
| 2335 | + train_folder = os.path.join(root_folder, "Training") |
| 2336 | + os.makedirs(train_folder, exist_ok=True) |
| 2337 | + |
| 2338 | + num_examples = 3 |
| 2339 | + classes = ("00000", "00042", "00012") |
| 2340 | + for class_idx in classes: |
| 2341 | + datasets_utils.create_image_folder( |
| 2342 | + train_folder, |
| 2343 | + name=class_idx, |
| 2344 | + file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", |
| 2345 | + num_examples=num_examples, |
| 2346 | + ) |
| 2347 | + |
| 2348 | + total_number_of_examples = num_examples * len(classes) |
| 2349 | + # Test data |
| 2350 | + test_folder = os.path.join(root_folder, "Final_Test", "Images") |
| 2351 | + os.makedirs(test_folder, exist_ok=True) |
| 2352 | + |
| 2353 | + with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file: |
| 2354 | + csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n") |
| 2355 | + |
| 2356 | + for _ in range(total_number_of_examples): |
| 2357 | + image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm" |
| 2358 | + datasets_utils.create_image_file(test_folder, image_file) |
| 2359 | + row = [ |
| 2360 | + image_file, |
| 2361 | + torch.randint(1, 100, size=()).item(), |
| 2362 | + torch.randint(1, 100, size=()).item(), |
| 2363 | + torch.randint(1, 100, size=()).item(), |
| 2364 | + torch.randint(1, 100, size=()).item(), |
| 2365 | + torch.randint(1, 100, size=()).item(), |
| 2366 | + torch.randint(1, 100, size=()).item(), |
| 2367 | + torch.randint(0, 43, size=()).item(), |
| 2368 | + ] |
| 2369 | + csv_file.write(";".join(map(str, row)) + "\n") |
| 2370 | + |
| 2371 | + return total_number_of_examples |
| 2372 | + |
| 2373 | + |
| 2374 | +class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): |
| 2375 | + DATASET_CLASS = datasets.CLEVRClassification |
| 2376 | + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) |
| 2377 | + |
| 2378 | + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) |
| 2379 | + |
| 2380 | + def inject_fake_data(self, tmpdir, config): |
| 2381 | + data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" |
| 2382 | + |
| 2383 | + images_folder = data_folder / "images" |
| 2384 | + image_files = datasets_utils.create_image_folder( |
| 2385 | + images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5 |
| 2386 | + ) |
| 2387 | + |
| 2388 | + scenes_folder = data_folder / "scenes" |
| 2389 | + scenes_folder.mkdir() |
| 2390 | + if config["split"] != "test": |
| 2391 | + with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file: |
| 2392 | + json.dump( |
| 2393 | + dict( |
| 2394 | + info=dict(), |
| 2395 | + scenes=[ |
| 2396 | + dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ()))) |
| 2397 | + for image_file in image_files |
| 2398 | + ], |
| 2399 | + ), |
| 2400 | + file, |
| 2401 | + ) |
| 2402 | + |
| 2403 | + return len(image_files) |
| 2404 | + |
| 2405 | + |
| 2406 | +class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): |
| 2407 | + DATASET_CLASS = datasets.OxfordIIITPet |
| 2408 | + FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) |
| 2409 | + |
| 2410 | + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( |
| 2411 | + split=("trainval", "test"), |
| 2412 | + target_types=("category", "segmentation", ["category", "segmentation"], []), |
| 2413 | + ) |
| 2414 | + |
| 2415 | + def inject_fake_data(self, tmpdir, config): |
| 2416 | + base_folder = os.path.join(tmpdir, "oxford-iiit-pet") |
| 2417 | + |
| 2418 | + classification_anns_meta = ( |
| 2419 | + dict(cls="Abyssinian", label=0, species="cat"), |
| 2420 | + dict(cls="Keeshond", label=18, species="dog"), |
| 2421 | + dict(cls="Yorkshire Terrier", label=37, species="dog"), |
| 2422 | + ) |
| 2423 | + split_and_classification_anns = [ |
| 2424 | + self._meta_to_split_and_classification_ann(meta, idx) |
| 2425 | + for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) |
| 2426 | + ] |
| 2427 | + image_ids, *_ = zip(*split_and_classification_anns) |
| 2428 | + |
| 2429 | + image_files = datasets_utils.create_image_folder( |
| 2430 | + base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) |
| 2431 | + ) |
| 2432 | + |
| 2433 | + anns_folder = os.path.join(base_folder, "annotations") |
| 2434 | + os.makedirs(anns_folder) |
| 2435 | + split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2) |
| 2436 | + with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file: |
| 2437 | + writer = csv.writer(file, delimiter=" ") |
| 2438 | + for split_and_classification_ann in split_and_classification_anns_in_split: |
| 2439 | + writer.writerow(split_and_classification_ann) |
| 2440 | + |
| 2441 | + segmentation_files = datasets_utils.create_image_folder( |
| 2442 | + anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) |
| 2443 | + ) |
| 2444 | + |
| 2445 | + # The dataset has some rogue files |
| 2446 | + for path in image_files[:2]: |
| 2447 | + path.with_suffix(".mat").touch() |
| 2448 | + for path in segmentation_files: |
| 2449 | + path.with_name(f".{path.name}").touch() |
| 2450 | + |
| 2451 | + return len(split_and_classification_anns_in_split) |
| 2452 | + |
| 2453 | + def _meta_to_split_and_classification_ann(self, meta, idx): |
| 2454 | + image_id = "_".join( |
| 2455 | + [ |
| 2456 | + *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], |
| 2457 | + str(idx), |
| 2458 | + ] |
| 2459 | + ) |
| 2460 | + class_id = str(meta["label"] + 1) |
| 2461 | + species = "1" if meta["species"] == "cat" else "2" |
| 2462 | + breed_id = "-1" |
| 2463 | + return (image_id, class_id, species, breed_id) |
| 2464 | + |
| 2465 | + |
2254 | 2466 | if __name__ == "__main__":
|
2255 | 2467 | unittest.main()
|
0 commit comments