From 27579af185fabdf9c7ced9397f2356f31e0110ec Mon Sep 17 00:00:00 2001 From: Jirka Date: Mon, 11 Oct 2021 14:16:45 +0200 Subject: [PATCH 1/2] pre-commit black --- .pre-commit-config.yaml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0024d0243d2..a773c7c1e47 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,18 @@ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-docstring-first + - id: check-toml + - id: check-yaml + exclude: packaging/.* + - id: end-of-file-fixer + + - repo: https://github.com/psf/black + rev: 21.9b0 + hooks: + - id: black + - repo: https://github.com/omnilib/ufmt rev: v1.3.0 hooks: @@ -6,16 +20,9 @@ repos: additional_dependencies: - black == 21.9b0 - usort == 0.6.4 + - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 args: [--config=setup.cfg] - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 - hooks: - - id: check-docstring-first - - id: check-toml - - id: check-yaml - exclude: packaging/.* - - id: end-of-file-fixer From f7dd9ad89c96790b30d3b5ecd2a494f1fb69234b Mon Sep 17 00:00:00 2001 From: Jirka Date: Mon, 11 Oct 2021 14:18:36 +0200 Subject: [PATCH 2/2] apply --- gallery/plot_repurposing_annotations.py | 1 + gallery/plot_scripted_tensor_transforms.py | 17 +- gallery/plot_transforms.py | 17 +- gallery/plot_video_api.py | 76 ++++----- gallery/plot_visualization_utils.py | 177 +++++++++++++++------ 5 files changed, 186 insertions(+), 102 deletions(-) diff --git a/gallery/plot_repurposing_annotations.py b/gallery/plot_repurposing_annotations.py index fb4835496c3..da56525a899 100644 --- a/gallery/plot_repurposing_annotations.py +++ b/gallery/plot_repurposing_annotations.py @@ -161,6 +161,7 @@ def show(imgs): # Here is an example where we re-purpose the dataset from the # `PenFudan Detection Tutorial `_. + class SegmentationToDetectionDataset(torch.utils.data.Dataset): def __init__(self, root, transforms): self.root = root diff --git a/gallery/plot_scripted_tensor_transforms.py b/gallery/plot_scripted_tensor_transforms.py index 6f3cc22073e..63c052b7a4b 100644 --- a/gallery/plot_scripted_tensor_transforms.py +++ b/gallery/plot_scripted_tensor_transforms.py @@ -33,14 +33,14 @@ from torchvision.io import read_image -plt.rcParams["savefig.bbox"] = 'tight' +plt.rcParams["savefig.bbox"] = "tight" torch.manual_seed(1) def show(imgs): fix, axs = plt.subplots(ncols=len(imgs), squeeze=False) for i, img in enumerate(imgs): - img = T.ToPILImage()(img.to('cpu')) + img = T.ToPILImage()(img.to("cpu")) axs[0, i].imshow(np.asarray(img)) axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) @@ -49,8 +49,8 @@ def show(imgs): # The :func:`~torchvision.io.read_image` function allows to read an image and # directly load it as a tensor -dog1 = read_image(str(Path('assets') / 'dog1.jpg')) -dog2 = read_image(str(Path('assets') / 'dog2.jpg')) +dog1 = read_image(str(Path("assets") / "dog1.jpg")) +dog2 = read_image(str(Path("assets") / "dog2.jpg")) show([dog1, dog2]) #################################### @@ -68,7 +68,7 @@ def show(imgs): T.RandomHorizontalFlip(p=0.3), ) -device = 'cuda' if torch.cuda.is_available() else 'cpu' +device = "cuda" if torch.cuda.is_available() else "cpu" dog1 = dog1.to(device) dog2 = dog2.to(device) @@ -89,15 +89,14 @@ def show(imgs): class Predictor(nn.Module): - def __init__(self): super().__init__() self.resnet18 = resnet18(pretrained=True, progress=False).eval() self.transforms = nn.Sequential( - T.Resize([256, ]), # We use single int value inside a list due to torchscript type restrictions + T.Resize((256,)), # We use single int value inside a list due to torchscript type restrictions T.CenterCrop(224), T.ConvertImageDtype(torch.float), - T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -125,7 +124,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: import json -with open(Path('assets') / 'imagenet_class_index.json', 'r') as labels_file: +with open(Path("assets") / "imagenet_class_index.json", "r") as labels_file: labels = json.load(labels_file) for i, (pred, pred_scripted) in enumerate(zip(res, res_scripted)): diff --git a/gallery/plot_transforms.py b/gallery/plot_transforms.py index ab0cb892b16..229b7ebb5fa 100644 --- a/gallery/plot_transforms.py +++ b/gallery/plot_transforms.py @@ -18,8 +18,8 @@ import torchvision.transforms as T -plt.rcParams["savefig.bbox"] = 'tight' -orig_img = Image.open(Path('assets') / 'astronaut.jpg') +plt.rcParams["savefig.bbox"] = "tight" +orig_img = Image.open(Path("assets") / "astronaut.jpg") # if you change the seed, make sure that the randomly-applied transforms # properly show that the image can be both transformed and *not* transformed! torch.manual_seed(0) @@ -41,7 +41,7 @@ def plot(imgs, with_orig=True, row_title=None, **imshow_kwargs): ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) if with_orig: - axs[0, 0].set(title='Original image') + axs[0, 0].set(title="Original image") axs[0, 0].title.set_size(8) if row_title is not None: for row_idx in range(num_rows): @@ -93,7 +93,7 @@ def plot(imgs, with_orig=True, row_title=None, **imshow_kwargs): # (see also :func:`~torchvision.transforms.functional.to_grayscale`) # converts an image to grayscale gray_img = T.Grayscale()(orig_img) -plot([gray_img], cmap='gray') +plot([gray_img], cmap="gray") #################################### # Random transforms @@ -105,7 +105,7 @@ def plot(imgs, with_orig=True, row_title=None, **imshow_kwargs): # ~~~~~~~~~~~ # The :class:`~torchvision.transforms.ColorJitter` transform # randomly changes the brightness, saturation, and other properties of an image. -jitter = T.ColorJitter(brightness=.5, hue=.3) +jitter = T.ColorJitter(brightness=0.5, hue=0.3) jitted_imgs = [jitter(orig_img) for _ in range(4)] plot(jitted_imgs) @@ -240,11 +240,8 @@ def plot(imgs, with_orig=True, row_title=None, **imshow_kwargs): # See :class:`~torchvision.transforms.AutoAugmentPolicy` for the available policies. policies = [T.AutoAugmentPolicy.CIFAR10, T.AutoAugmentPolicy.IMAGENET, T.AutoAugmentPolicy.SVHN] augmenters = [T.AutoAugment(policy) for policy in policies] -imgs = [ - [augmenter(orig_img) for _ in range(4)] - for augmenter in augmenters -] -row_title = [str(policy).split('.')[-1] for policy in policies] +imgs = [[augmenter(orig_img) for _ in range(4)] for augmenter in augmenters] +row_title = [str(policy).split(".")[-1] for policy in policies] plot(imgs, row_title=row_title) #################################### diff --git a/gallery/plot_video_api.py b/gallery/plot_video_api.py index fe296d67be0..0c47fa27497 100644 --- a/gallery/plot_video_api.py +++ b/gallery/plot_video_api.py @@ -35,9 +35,7 @@ # Download the sample video download_url( - "https://github.com/pytorch/vision/blob/main/test/assets/videos/WUzgd7C1pWA.mp4?raw=true", - ".", - "WUzgd7C1pWA.mp4" + "https://github.com/pytorch/vision/blob/main/test/assets/videos/WUzgd7C1pWA.mp4?raw=true", ".", "WUzgd7C1pWA.mp4" ) video_path = "./WUzgd7C1pWA.mp4" @@ -75,12 +73,12 @@ frames = [] # we are going to save the frames here. ptss = [] # pts is a presentation timestamp in seconds (float) of each frame for frame in video: - frames.append(frame['data']) - ptss.append(frame['pts']) + frames.append(frame["data"]) + ptss.append(frame["pts"]) print("PTS for first five frames ", ptss[:5]) print("Total number of frames: ", len(frames)) -approx_nf = metadata['audio']['duration'][0] * metadata['audio']['framerate'][0] +approx_nf = metadata["audio"]["duration"][0] * metadata["audio"]["framerate"][0] print("Approx total number of datapoints we can expect: ", approx_nf) print("Read data size: ", frames[0].size(0) * len(frames)) @@ -96,6 +94,7 @@ import itertools + video.set_current_stream("video") frames = [] # we are going to save the frames here. @@ -116,11 +115,11 @@ frames = [] # we are going to save the frames here. video = video.seek(2) -for frame in itertools.takewhile(lambda x: x['pts'] <= 5, video): - frames.append(frame['data']) +for frame in itertools.takewhile(lambda x: x["pts"] <= 5, video): + frames.append(frame["data"]) print("Total number of frames: ", len(frames)) -approx_nf = (5 - 2) * video.get_metadata()['video']['fps'][0] +approx_nf = (5 - 2) * video.get_metadata()["video"]["fps"][0] print("We can expect approx: ", approx_nf) print("Tensor size: ", frames[0].size()) @@ -136,8 +135,7 @@ def example_read_video(video_object, start=0, end=None, read_video=True, read_au end = float("inf") if end < start: raise ValueError( - "end time should be larger than start time, got " - "start time={} and end time={}".format(start, end) + "end time should be larger than start time, got " "start time={} and end time={}".format(start, end) ) video_frames = torch.empty(0) @@ -145,9 +143,9 @@ def example_read_video(video_object, start=0, end=None, read_video=True, read_au if read_video: video_object.set_current_stream("video") frames = [] - for frame in itertools.takewhile(lambda x: x['pts'] <= end, video_object.seek(start)): - frames.append(frame['data']) - video_pts.append(frame['pts']) + for frame in itertools.takewhile(lambda x: x["pts"] <= end, video_object.seek(start)): + frames.append(frame["data"]) + video_pts.append(frame["pts"]) if len(frames) > 0: video_frames = torch.stack(frames, 0) @@ -156,9 +154,9 @@ def example_read_video(video_object, start=0, end=None, read_video=True, read_au if read_audio: video_object.set_current_stream("audio") frames = [] - for frame in itertools.takewhile(lambda x: x['pts'] <= end, video_object.seek(start)): - frames.append(frame['data']) - video_pts.append(frame['pts']) + for frame in itertools.takewhile(lambda x: x["pts"] <= end, video_object.seek(start)): + frames.append(frame["data"]) + video_pts.append(frame["pts"]) if len(frames) > 0: audio_frames = torch.cat(frames, 0) @@ -179,6 +177,7 @@ def example_read_video(video_object, start=0, end=None, read_video=True, read_au #################################### # Make sample dataset import os + os.makedirs("./dataset", exist_ok=True) os.makedirs("./dataset/1", exist_ok=True) os.makedirs("./dataset/2", exist_ok=True) @@ -186,29 +185,31 @@ def example_read_video(video_object, start=0, end=None, read_video=True, read_au #################################### # Download the videos from torchvision.datasets.utils import download_url + download_url( "https://github.com/pytorch/vision/blob/main/test/assets/videos/WUzgd7C1pWA.mp4?raw=true", - "./dataset/1", "WUzgd7C1pWA.mp4" + "./dataset/1", + "WUzgd7C1pWA.mp4", ) download_url( "https://github.com/pytorch/vision/blob/main/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi?raw=true", "./dataset/1", - "RATRACE_wave_f_nm_np1_fr_goo_37.avi" + "RATRACE_wave_f_nm_np1_fr_goo_37.avi", ) download_url( "https://github.com/pytorch/vision/blob/main/test/assets/videos/SOX5yA1l24A.mp4?raw=true", "./dataset/2", - "SOX5yA1l24A.mp4" + "SOX5yA1l24A.mp4", ) download_url( "https://github.com/pytorch/vision/blob/main/test/assets/videos/v_SoccerJuggling_g23_c01.avi?raw=true", "./dataset/2", - "v_SoccerJuggling_g23_c01.avi" + "v_SoccerJuggling_g23_c01.avi", ) download_url( "https://github.com/pytorch/vision/blob/main/test/assets/videos/v_SoccerJuggling_g24_c01.avi?raw=true", "./dataset/2", - "v_SoccerJuggling_g24_c01.avi" + "v_SoccerJuggling_g24_c01.avi", ) #################################### @@ -231,6 +232,7 @@ def get_samples(root, extensions=(".mp4", ".avi")): _, class_to_idx = _find_classes(root) return make_dataset(root, class_to_idx, extensions=extensions) + #################################### # We are going to define the dataset and some basic arguments. # We assume the structure of the FolderDataset, and add the following parameters: @@ -269,23 +271,19 @@ def __iter__(self): video_frames = [] # video frame buffer # Seek and return frames - max_seek = metadata["video"]['duration'][0] - (self.clip_len / metadata["video"]['fps'][0]) - start = random.uniform(0., max_seek) + max_seek = metadata["video"]["duration"][0] - (self.clip_len / metadata["video"]["fps"][0]) + start = random.uniform(0.0, max_seek) for frame in itertools.islice(vid.seek(start), self.clip_len): - video_frames.append(self.frame_transform(frame['data'])) - current_pts = frame['pts'] + video_frames.append(self.frame_transform(frame["data"])) + current_pts = frame["pts"] # Stack it into a tensor video = torch.stack(video_frames, 0) if self.video_transform: video = self.video_transform(video) - output = { - 'path': path, - 'video': video, - 'target': target, - 'start': start, - 'end': current_pts} + output = {"path": path, "video": video, "target": target, "start": start, "end": current_pts} yield output + #################################### # Given a path of videos in a folder structure, i.e: # @@ -310,14 +308,15 @@ def __iter__(self): #################################### from torch.utils.data import DataLoader + loader = DataLoader(dataset, batch_size=12) -data = {"video": [], 'start': [], 'end': [], 'tensorsize': []} +data = {"video": [], "start": [], "end": [], "tensorsize": []} for batch in loader: - for i in range(len(batch['path'])): - data['video'].append(batch['path'][i]) - data['start'].append(batch['start'][i].item()) - data['end'].append(batch['end'][i].item()) - data['tensorsize'].append(batch['video'][i].size()) + for i in range(len(batch["path"])): + data["video"].append(batch["path"][i]) + data["start"].append(batch["start"][i].item()) + data["end"].append(batch["end"][i].item()) + data["tensorsize"].append(batch["video"][i].size()) print(data) #################################### @@ -337,5 +336,6 @@ def __iter__(self): # Cleanup the video and dataset: import os import shutil + os.remove("./WUzgd7C1pWA.mp4") shutil.rmtree("./dataset") diff --git a/gallery/plot_visualization_utils.py b/gallery/plot_visualization_utils.py index 628319e52d5..767b6527562 100644 --- a/gallery/plot_visualization_utils.py +++ b/gallery/plot_visualization_utils.py @@ -16,7 +16,7 @@ import torchvision.transforms.functional as F -plt.rcParams["savefig.bbox"] = 'tight' +plt.rcParams["savefig.bbox"] = "tight" def show(imgs): @@ -41,8 +41,8 @@ def show(imgs): from torchvision.io import read_image from pathlib import Path -dog1_int = read_image(str(Path('assets') / 'dog1.jpg')) -dog2_int = read_image(str(Path('assets') / 'dog2.jpg')) +dog1_int = read_image(str(Path("assets") / "dog1.jpg")) +dog2_int = read_image(str(Path("assets") / "dog2.jpg")) grid = make_grid([dog1_int, dog2_int, dog1_int, dog2_int]) show(grid) @@ -90,9 +90,9 @@ def show(imgs): # Let's plot the boxes detected by our model. We will only plot the boxes with a # score greater than a given threshold. -score_threshold = .8 +score_threshold = 0.8 dogs_with_boxes = [ - draw_bounding_boxes(dog_int, boxes=output['boxes'][output['scores'] > score_threshold], width=4) + draw_bounding_boxes(dog_int, boxes=output["boxes"][output["scores"] > score_threshold], width=4) for dog_int, output in zip(batch_int, outputs) ] show(dogs_with_boxes) @@ -127,7 +127,7 @@ def show(imgs): model = model.eval() normalized_batch = F.normalize(batch, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) -output = model(normalized_batch)['out'] +output = model(normalized_batch)["out"] print(output.shape, output.min().item(), output.max().item()) ##################################### @@ -141,18 +141,34 @@ def show(imgs): # boat class: sem_classes = [ - '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', - 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', - 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' + "__background__", + "aeroplane", + "bicycle", + "bird", + "boat", + "bottle", + "bus", + "car", + "cat", + "chair", + "cow", + "diningtable", + "dog", + "horse", + "motorbike", + "person", + "pottedplant", + "sheep", + "sofa", + "train", + "tvmonitor", ] sem_class_to_idx = {cls: idx for (idx, cls) in enumerate(sem_classes)} normalized_masks = torch.nn.functional.softmax(output, dim=1) dog_and_boat_masks = [ - normalized_masks[img_idx, sem_class_to_idx[cls]] - for img_idx in range(batch.shape[0]) - for cls in ('dog', 'boat') + normalized_masks[img_idx, sem_class_to_idx[cls]] for img_idx in range(batch.shape[0]) for cls in ("dog", "boat") ] show(dog_and_boat_masks) @@ -167,7 +183,7 @@ def show(imgs): # 1]``. To get boolean masks, we can do the following: class_dim = 1 -boolean_dog_masks = (normalized_masks.argmax(class_dim) == sem_class_to_idx['dog']) +boolean_dog_masks = normalized_masks.argmax(class_dim) == sem_class_to_idx["dog"] print(f"shape = {boolean_dog_masks.shape}, dtype = {boolean_dog_masks.dtype}") show([m.float() for m in boolean_dog_masks]) @@ -189,8 +205,7 @@ def show(imgs): from torchvision.utils import draw_segmentation_masks dogs_with_masks = [ - draw_segmentation_masks(img, masks=mask, alpha=0.7) - for img, mask in zip(batch_int, boolean_dog_masks) + draw_segmentation_masks(img, masks=mask, alpha=0.7) for img, mask in zip(batch_int, boolean_dog_masks) ] show(dogs_with_masks) @@ -211,7 +226,7 @@ def show(imgs): print(f"dog1_masks shape = {dog1_masks.shape}, dtype = {dog1_masks.dtype}") print(f"dog1_all_classes_masks = {dog1_all_classes_masks.shape}, dtype = {dog1_all_classes_masks.dtype}") -dog_with_all_masks = draw_segmentation_masks(dog1_int, masks=dog1_all_classes_masks, alpha=.6) +dog_with_all_masks = draw_segmentation_masks(dog1_int, masks=dog1_all_classes_masks, alpha=0.6) show(dog_with_all_masks) ##################################### @@ -235,8 +250,7 @@ def show(imgs): all_classes_masks = all_classes_masks.swapaxes(0, 1) dogs_with_masks = [ - draw_segmentation_masks(img, masks=mask, alpha=.6) - for img, mask in zip(batch_int, all_classes_masks) + draw_segmentation_masks(img, masks=mask, alpha=0.6) for img, mask in zip(batch_int, all_classes_masks) ] show(dogs_with_masks) @@ -263,6 +277,7 @@ def show(imgs): # :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`. from torchvision.models.detection import maskrcnn_resnet50_fpn + model = maskrcnn_resnet50_fpn(pretrained=True, progress=False) model = model.eval() @@ -288,9 +303,8 @@ def show(imgs): # models. dog1_output = output[0] -dog1_masks = dog1_output['masks'] -print(f"shape = {dog1_masks.shape}, dtype = {dog1_masks.dtype}, " - f"min = {dog1_masks.min()}, max = {dog1_masks.max()}") +dog1_masks = dog1_output["masks"] +print(f"shape = {dog1_masks.shape}, dtype = {dog1_masks.dtype}, " f"min = {dog1_masks.min()}, max = {dog1_masks.max()}") ##################################### # Here the masks corresponds to probabilities indicating, for each pixel, how @@ -299,24 +313,103 @@ def show(imgs): # Let's see which labels were predicted for the instances of the first image. inst_classes = [ - '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', - 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', - 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', - 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', - 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', - 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', - 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', - 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', - 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' + "__background__", + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "N/A", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "N/A", + "backpack", + "umbrella", + "N/A", + "N/A", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "N/A", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "N/A", + "dining table", + "N/A", + "N/A", + "toilet", + "N/A", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "N/A", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", ] inst_class_to_idx = {cls: idx for (idx, cls) in enumerate(inst_classes)} print("For the first dog, the following instances were detected:") -print([inst_classes[label] for label in dog1_output['labels']]) +print([inst_classes[label] for label in dog1_output["labels"]]) ##################################### # Interestingly, the model detects two persons in the image. Let's go ahead and @@ -328,7 +421,7 @@ def show(imgs): # (one could also choose a different threshold). proba_threshold = 0.5 -dog1_bool_masks = dog1_output['masks'] > proba_threshold +dog1_bool_masks = dog1_output["masks"] > proba_threshold print(f"shape = {dog1_bool_masks.shape}, dtype = {dog1_bool_masks.dtype}") # There's an extra dimension (1) to the masks. We need to remove it @@ -341,7 +434,7 @@ def show(imgs): # with people. Looking more closely at the scores will help us plotting more # relevant masks: -print(dog1_output['scores']) +print(dog1_output["scores"]) ##################################### # Clearly the model is more confident about the dog detection than it is about @@ -349,17 +442,11 @@ def show(imgs): # for only those that have a good score. Let's use a score threshold of .75 # here, and also plot the masks of the second dog. -score_threshold = .75 +score_threshold = 0.75 -boolean_masks = [ - out['masks'][out['scores'] > score_threshold] > proba_threshold - for out in output -] +boolean_masks = [out["masks"][out["scores"] > score_threshold] > proba_threshold for out in output] -dogs_with_masks = [ - draw_segmentation_masks(img, mask.squeeze(1)) - for img, mask in zip(batch_int, boolean_masks) -] +dogs_with_masks = [draw_segmentation_masks(img, mask.squeeze(1)) for img, mask in zip(batch_int, boolean_masks)] show(dogs_with_masks) #####################################