From 315a0ff7aade9b89c95d72156f87d93d74bb8c8d Mon Sep 17 00:00:00 2001 From: David Date: Tue, 8 Aug 2017 20:58:44 +0800 Subject: [PATCH 1/5] [add] A PairRandomCrop for both input and target. When we crop images in Semantic Segmentation and other related work, we want to crop both input image and its target image, however, RandomCrop generates a new random positon each time being called, PairRandomCrop instead, will use the same position twice. --- README.rst | 10 +++++++++ torchvision/transforms.py | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/README.rst b/README.rst index c6de582fb08..40f5abc78da 100644 --- a/README.rst +++ b/README.rst @@ -332,6 +332,16 @@ integer, in which case the target will be of a square shape (size, size) If ``padding`` is non-zero, then the image is first zero-padded on each side with ``padding`` pixels. +``PairRandomCrop(size, padding=0)`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Crops the given PIL.Image at a random location to have a region of the +given size for both input image and its target image. size can be a +tuple (target\_height, target\_width) or an integer, in which case the +target will be of a square shape (size, size) +If ``padding`` is non-zero, then the image is first zero-padded on each +side with ``padding`` pixels. + ``RandomHorizontalFlip()`` ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/torchvision/transforms.py b/torchvision/transforms.py index 6d649ab18fa..f0b3045ed57 100644 --- a/torchvision/transforms.py +++ b/torchvision/transforms.py @@ -310,6 +310,51 @@ def __call__(self, img): y1 = random.randint(0, h - th) return img.crop((x1, y1, x1 + tw, y1 + th)) +class PairRandomCrop(object): + """Crop the given PIL.Image at a random location for both input and target. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + padding (int or sequence, optional): Optional padding on each border + of the image. Default is 0, i.e no padding. If a sequence of length + 4 is provided, it is used to pad left, top, right, bottom borders + respectively. + """ + image_crop_position = {} + + def __init__(self, size, padding=0): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + + def __call__(self, img): + """ + Args: + img (PIL.Image): Image to be cropped. + Returns: + PIL.Image: Cropped image. + """ + if self.padding > 0: + img = ImageOps.expand(img, border=self.padding, fill=0) + + w, h = img.size + th, tw = self.size + if w == tw and h == th: + return img + + pid = os.getpid() + if pid in self.image_crop_position: + x1, y1 = self.image_crop_position.pop(pid) + else: + x1 = random.randint(0, w - tw) + y1 = random.randint(0, h - th) + self.image_crop_position[pid] = (x1, y1) + return img.crop((x1, y1, x1 + tw, y1 + th)) + class RandomHorizontalFlip(object): """Horizontally flip the given PIL.Image randomly with a probability of 0.5.""" From dac43f27a40348d3452dec74ea7263cd5fca8a8e Mon Sep 17 00:00:00 2001 From: David Date: Tue, 8 Aug 2017 21:13:11 +0800 Subject: [PATCH 2/5] [fix] import os module. --- torchvision/transforms.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/torchvision/transforms.py b/torchvision/transforms.py index f0b3045ed57..4dc76824324 100644 --- a/torchvision/transforms.py +++ b/torchvision/transforms.py @@ -1,7 +1,16 @@ +#!/usr/local/bin/python3 +# coding: UTF-8 +# Author: David +# Email: youchen.du@gmail.com +# Created: 2017-08-08 21:13 +# Last modified: 2017-08-08 21:13 +# Filename: transforms.py +# Description: from __future__ import division import torch import math import random +import os from PIL import Image, ImageOps try: import accimage From b19d9242c9f5fafc42075ec36b19f50238b6aabd Mon Sep 17 00:00:00 2001 From: David Date: Tue, 8 Aug 2017 21:14:57 +0800 Subject: [PATCH 3/5] [fix] pep8 style fix. --- torchvision/transforms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchvision/transforms.py b/torchvision/transforms.py index 4dc76824324..018797669ad 100644 --- a/torchvision/transforms.py +++ b/torchvision/transforms.py @@ -319,6 +319,7 @@ def __call__(self, img): y1 = random.randint(0, h - th) return img.crop((x1, y1, x1 + tw, y1 + th)) + class PairRandomCrop(object): """Crop the given PIL.Image at a random location for both input and target. From 5ee7a8832327a9e0eea7a9ba9f2f3c66b97794fe Mon Sep 17 00:00:00 2001 From: David Date: Sun, 13 Aug 2017 18:54:13 +0800 Subject: [PATCH 4/5] [remove] remove personal info section. --- torchvision/transforms.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/torchvision/transforms.py b/torchvision/transforms.py index 018797669ad..8460c50aa1b 100644 --- a/torchvision/transforms.py +++ b/torchvision/transforms.py @@ -1,11 +1,3 @@ -#!/usr/local/bin/python3 -# coding: UTF-8 -# Author: David -# Email: youchen.du@gmail.com -# Created: 2017-08-08 21:13 -# Last modified: 2017-08-08 21:13 -# Filename: transforms.py -# Description: from __future__ import division import torch import math From 15ea3572b5f81a6fb5aa066e6f1d85f0019f7bf6 Mon Sep 17 00:00:00 2001 From: David Date: Tue, 15 Aug 2017 22:29:06 +0800 Subject: [PATCH 5/5] [update] no dict needed actually. --- torchvision/transforms.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/torchvision/transforms.py b/torchvision/transforms.py index 8460c50aa1b..d948bae6175 100644 --- a/torchvision/transforms.py +++ b/torchvision/transforms.py @@ -2,7 +2,6 @@ import torch import math import random -import os from PIL import Image, ImageOps try: import accimage @@ -324,7 +323,7 @@ class PairRandomCrop(object): 4 is provided, it is used to pad left, top, right, bottom borders respectively. """ - image_crop_position = {} + last_position = None def __init__(self, size, padding=0): if isinstance(size, numbers.Number): @@ -348,13 +347,12 @@ def __call__(self, img): if w == tw and h == th: return img - pid = os.getpid() - if pid in self.image_crop_position: - x1, y1 = self.image_crop_position.pop(pid) + if self.last_position is not None: + (x1, y1), self.last_position = self.last_position, None else: x1 = random.randint(0, w - tw) y1 = random.randint(0, h - th) - self.image_crop_position[pid] = (x1, y1) + self.last_position = (x1, y1) return img.crop((x1, y1, x1 + tw, y1 + th))