Skip to content

Commit 8a4786a

Browse files
vishwakftwfmassa
authored andcommitted
Add functional transforms to docs (#499)
1 parent 47214f0 commit 8a4786a

File tree

2 files changed

+68
-48
lines changed

2 files changed

+68
-48
lines changed

docs/source/transforms.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,9 @@ Generic Transforms
7474

7575
.. autoclass:: Lambda
7676

77+
78+
Functional Transforms
79+
---------------------
80+
81+
.. automodule:: torchvision.transforms.functional
82+
:members:

torchvision/transforms/functional.py

Lines changed: 62 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def to_tensor(pic):
8888
def to_pil_image(pic, mode=None):
8989
"""Convert a tensor or an ndarray to PIL Image.
9090
91-
See :class:`~torchvision.transforms.ToPIlImage` for more details.
91+
See :class:`~torchvision.transforms.ToPILImage` for more details.
9292
9393
Args:
9494
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
@@ -151,7 +151,7 @@ def to_pil_image(pic, mode=None):
151151
def normalize(tensor, mean, std):
152152
"""Normalize a tensor image with mean and standard deviation.
153153
154-
See ``Normalize`` for more details.
154+
See :class:`~torchvision.transforms.Normalize` for more details.
155155
156156
Args:
157157
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
@@ -170,15 +170,15 @@ def normalize(tensor, mean, std):
170170

171171

172172
def resize(img, size, interpolation=Image.BILINEAR):
173-
"""Resize the input PIL Image to the given size.
173+
r"""Resize the input PIL Image to the given size.
174174
175175
Args:
176176
img (PIL Image): Image to be resized.
177177
size (sequence or int): Desired output size. If size is a sequence like
178178
(h, w), the output size will be matched to this. If size is an int,
179179
the smaller edge of the image will be matched to this number maintaing
180180
the aspect ratio. i.e, if height > width, then image will be rescaled to
181-
(size * height / width, size)
181+
:math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`
182182
interpolation (int, optional): Desired interpolation. Default is
183183
``PIL.Image.BILINEAR``
184184
@@ -213,7 +213,7 @@ def scale(*args, **kwargs):
213213

214214

215215
def pad(img, padding, fill=0, padding_mode='constant'):
216-
"""Pad the given PIL Image on all sides with speficified padding mode and fill value.
216+
r"""Pad the given PIL Image on all sides with speficified padding mode and fill value.
217217
218218
Args:
219219
img (PIL Image): Image to be padded.
@@ -226,14 +226,20 @@ def pad(img, padding, fill=0, padding_mode='constant'):
226226
length 3, it is used to fill R, G, B channels respectively.
227227
This value is only used when the padding_mode is constant
228228
padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
229-
constant: pads with a constant value, this value is specified with fill
230-
edge: pads with the last value on the edge of the image
231-
reflect: pads with reflection of image (without repeating the last value on the edge)
232-
padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
233-
will result in [3, 2, 1, 2, 3, 4, 3, 2]
234-
symmetric: pads with reflection of image (repeating the last value on the edge)
235-
padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
236-
will result in [2, 1, 1, 2, 3, 4, 4, 3]
229+
230+
- constant: pads with a constant value, this value is specified with fill
231+
232+
- edge: pads with the last value on the edge of the image
233+
234+
- reflect: pads with reflection of image (without repeating the last value on the edge)
235+
236+
padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
237+
will result in [3, 2, 1, 2, 3, 4, 3, 2]
238+
239+
- symmetric: pads with reflection of image (repeating the last value on the edge)
240+
241+
padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
242+
will result in [2, 1, 1, 2, 3, 4, 4, 3]
237243
238244
Returns:
239245
PIL Image: Padded image.
@@ -312,7 +318,7 @@ def center_crop(img, output_size):
312318
def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
313319
"""Crop the given PIL Image and resize it to desired size.
314320
315-
Notably used in RandomResizedCrop.
321+
Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
316322
317323
Args:
318324
img (PIL Image): Image to be cropped.
@@ -373,9 +379,10 @@ def five_crop(img, size):
373379
size (sequence or int): Desired output size of the crop. If size is an
374380
int instead of sequence like (h, w), a square crop (size, size) is
375381
made.
382+
376383
Returns:
377-
tuple: tuple (tl, tr, bl, br, center) corresponding top left,
378-
top right, bottom left, bottom right and center crop.
384+
tuple: tuple (tl, tr, bl, br, center)
385+
Corresponding top left, top right, bottom left, bottom right and center crop.
379386
"""
380387
if isinstance(size, numbers.Number):
381388
size = (int(size), int(size))
@@ -396,24 +403,23 @@ def five_crop(img, size):
396403

397404

398405
def ten_crop(img, size, vertical_flip=False):
399-
"""Crop the given PIL Image into four corners and the central crop plus the
400-
flipped version of these (horizontal flipping is used by default).
406+
r"""Crop the given PIL Image into four corners and the central crop plus the
407+
flipped version of these (horizontal flipping is used by default).
401408
402409
.. Note::
403410
This transform returns a tuple of images and there may be a
404411
mismatch in the number of inputs and targets your ``Dataset`` returns.
405412
406-
Args:
407-
size (sequence or int): Desired output size of the crop. If size is an
408-
int instead of sequence like (h, w), a square crop (size, size) is
409-
made.
410-
vertical_flip (bool): Use vertical flipping instead of horizontal
411-
412-
Returns:
413-
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
414-
br_flip, center_flip) corresponding top left, top right,
415-
bottom left, bottom right and center crop and same for the
416-
flipped image.
413+
Args:
414+
size (sequence or int): Desired output size of the crop. If size is an
415+
int instead of sequence like (h, w), a square crop (size, size) is
416+
made.
417+
vertical_flip (bool): Use vertical flipping instead of horizontal
418+
419+
Returns:
420+
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
421+
Corresponding top left, top right, bottom left, bottom right and center crop
422+
and same for the flipped image.
417423
"""
418424
if isinstance(size, numbers.Number):
419425
size = (int(size), int(size))
@@ -501,7 +507,9 @@ def adjust_hue(img, hue_factor):
501507
`hue_factor` is the amount of shift in H channel and must be in the
502508
interval `[-0.5, 0.5]`.
503509
504-
See https://en.wikipedia.org/wiki/Hue for more details on Hue.
510+
See `Hue`_ for more details.
511+
512+
.. _Hue: https://en.wikipedia.org/wiki/Hue
505513
506514
Args:
507515
img (PIL Image): PIL Image to be adjusted.
@@ -537,20 +545,23 @@ def adjust_hue(img, hue_factor):
537545

538546

539547
def adjust_gamma(img, gamma, gain=1):
540-
"""Perform gamma correction on an image.
548+
r"""Perform gamma correction on an image.
541549
542550
Also known as Power Law Transform. Intensities in RGB mode are adjusted
543551
based on the following equation:
544552
545-
I_out = 255 * gain * ((I_in / 255) ** gamma)
553+
.. math::
554+
I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
555+
556+
See `Gamma Correction`_ for more details.
546557
547-
See https://en.wikipedia.org/wiki/Gamma_correction for more details.
558+
.. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
548559
549560
Args:
550561
img (PIL Image): PIL Image to be adjusted.
551-
gamma (float): Non negative real number. gamma larger than 1 make the
552-
shadows darker, while gamma smaller than 1 make dark regions
553-
lighter.
562+
gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
563+
gamma larger than 1 make the shadows darker,
564+
while gamma smaller than 1 make dark regions lighter.
554565
gain (float): The constant multiplier.
555566
"""
556567
if not _is_pil_image(img):
@@ -575,18 +586,20 @@ def rotate(img, angle, resample=False, expand=False, center=None):
575586
576587
Args:
577588
img (PIL Image): PIL Image to be rotated.
578-
angle ({float, int}): In degrees degrees counter clockwise order.
579-
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
580-
An optional resampling filter.
581-
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
582-
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
589+
angle (float or int): In degrees degrees counter clockwise order.
590+
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
591+
An optional resampling filter. See `filters`_ for more information.
592+
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
583593
expand (bool, optional): Optional expansion flag.
584594
If true, expands the output image to make it large enough to hold the entire rotated image.
585595
If false or omitted, make the output image the same size as the input image.
586596
Note that the expand flag assumes rotation around the center and no translation.
587597
center (2-tuple, optional): Optional center of rotation.
588598
Origin is the upper left corner.
589599
Default is the center of the image.
600+
601+
.. _filters: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
602+
590603
"""
591604

592605
if not _is_pil_image(img):
@@ -635,14 +648,14 @@ def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None):
635648
636649
Args:
637650
img (PIL Image): PIL Image to be rotated.
638-
angle ({float, int}): rotation angle in degrees between -180 and 180, clockwise direction.
651+
angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction.
639652
translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation)
640653
scale (float): overall scale
641654
shear (float): shear angle value in degrees between -180 to 180, clockwise direction.
642-
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
655+
resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
643656
An optional resampling filter.
644-
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
645-
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
657+
See `filters`_ for more information.
658+
If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
646659
fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
647660
"""
648661
if not _is_pil_image(img):
@@ -667,9 +680,10 @@ def to_grayscale(img, num_output_channels=1):
667680
img (PIL Image): Image to be converted to grayscale.
668681
669682
Returns:
670-
PIL Image: Grayscale version of the image.
671-
if num_output_channels == 1 : returned image is single channel
672-
if num_output_channels == 3 : returned image is 3 channel with r == g == b
683+
PIL Image: Grayscale version of the image.
684+
if num_output_channels = 1 : returned image is single channel
685+
686+
if num_output_channels = 3 : returned image is 3 channel with r = g = b
673687
"""
674688
if not _is_pil_image(img):
675689
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))

0 commit comments

Comments
 (0)