diff --git a/docs/src/projective/gallery.md b/docs/src/projective/gallery.md index b85d8de..80afe0f 100644 --- a/docs/src/projective/gallery.md +++ b/docs/src/projective/gallery.md @@ -135,14 +135,14 @@ tfms = [ showgrid([apply(tfm, (image, bbox)) for tfm in tfms]; ncol=6, npad=8) ``` -## [`FlipX`](@ref), [`FlipY`](@ref), [`Reflect`](@ref) +## [`FlipX`](@ref), [`FlipY`](@ref), [`FlipZ`](@ref), [`Reflect`](@ref) Flip the data on the horizontally and vertically, respectively. More generally, reflect around an angle from the x-axis. ```@example deps tfms = [ - FlipX(), - FlipY(), + FlipX{2}(), + FlipY{2}(), Reflect(30), ] showgrid([apply(tfm, (image, bbox)) for tfm in tfms]; ncol=6, npad=8) diff --git a/docs/src/projective/intro.md b/docs/src/projective/intro.md index f96e4e0..86cceac 100644 --- a/docs/src/projective/intro.md +++ b/docs/src/projective/intro.md @@ -7,7 +7,7 @@ We can break down most augmentation used in practive into a single (possibly sto As an example, consider an image augmentation pipeline: A random horizontal flip, followed by a random resized crop. The latter resizes and crops (irregularly sized) images to a common size without distorting the aspect ratio. ```julia -Maybe(FlipX()) |> RandomResizeCrop((h, w)) +Maybe(FlipX{2}()) |> RandomResizeCrop((h, w)) ``` Let's pull apart the steps involved. diff --git a/docs/src/ref.md b/docs/src/ref.md index 4924a93..433e042 100644 --- a/docs/src/ref.md +++ b/docs/src/ref.md @@ -6,8 +6,10 @@ BoundingBox CenterCrop CenterResizeCrop Crop +FlipDim FlipX FlipY +FlipZ Image Keypoints MaskBinary diff --git a/docs/src/transformations.md b/docs/src/transformations.md index 11a8d3d..13e154b 100644 --- a/docs/src/transformations.md +++ b/docs/src/transformations.md @@ -6,7 +6,7 @@ using DataAugmentation Using transformations is easy. Simply `compose` them: ```@example tsm -tfm = Rotate(10) |> ScaleRatio((0.7,0.1,1.2)) |> FlipX() |> Crop((128, 128)) +tfm = Rotate(10) |> ScaleRatio((0.7,0.1,1.2)) |> FlipX{2}() |> Crop((128, 128)) ``` # Projective transformations @@ -26,8 +26,10 @@ Projective transformations include: Affine transformations are a subgroup of projective transformations that can be composed very efficiently: composing two affine transformations results in another affine transformation. Affine transformations can represent translation, scaling, reflection and rotation. Available `Transform`s are: ```@docs; canonical=false +FlipDim FlipX FlipY +FlipZ Reflect Rotate RotateX @@ -73,7 +75,7 @@ Let's say we have an image classification dataset. For most datasets, horizontal ```@example using DataAugmentation, TestImages item = Image(testimage("lighthouse")) -tfm = Maybe(FlipX()) +tfm = Maybe(FlipX{2}()) titems = [apply(tfm, item) for _ in 1:8] showgrid(titems; ncol = 4, npad = 16) ``` diff --git a/src/DataAugmentation.jl b/src/DataAugmentation.jl index 4b1c1e5..2f307f3 100644 --- a/src/DataAugmentation.jl +++ b/src/DataAugmentation.jl @@ -84,8 +84,10 @@ export Item, apply, Reflect, WarpAffine, + FlipDim, FlipX, FlipY, + FlipZ, PinOrigin, AdjustBrightness, AdjustContrast, diff --git a/src/projective/affine.jl b/src/projective/affine.jl index 0c77695..e53798c 100644 --- a/src/projective/affine.jl +++ b/src/projective/affine.jl @@ -43,7 +43,7 @@ struct ScaleKeepAspect{N} <: ProjectiveTransform end -function getprojection(scale::ScaleKeepAspect{N}, bounds; randstate = nothing) where N +function getprojection(scale::ScaleKeepAspect{N}, bounds::Bounds{N}; randstate = nothing) where N # If no scaling needs to be done, return a noop transform scale.minlengths == length.(bounds.rs) && return IdentityTransformation() @@ -51,7 +51,7 @@ function getprojection(scale::ScaleKeepAspect{N}, bounds; randstate = nothing) w ratio = maximum((scale.minlengths .+ 1) ./ length.(bounds.rs)) upperleft = SVector{N, Float32}(minimum.(bounds.rs)) .- 0.5 P = scaleprojection(Tuple(ratio for _ in 1:N)) - if upperleft != SVector(0, 0) + if any(upperleft .!= 0) P = P ∘ Translation((Float32.(P(upperleft)) .+ 0.5f0)) end return P @@ -79,11 +79,11 @@ struct ScaleFixed{N} <: ProjectiveTransform end -function getprojection(scale::ScaleFixed, bounds; randstate = nothing) +function getprojection(scale::ScaleFixed, bounds::Bounds{N}; randstate = nothing) where N ratios = (scale.sizes .+ 1) ./ length.(bounds.rs) - upperleft = SVector{2, Float32}(minimum.(bounds.rs)) .- 1 + upperleft = SVector{N, Float32}(minimum.(bounds.rs)) .- 1 P = scaleprojection(ratios) - if upperleft != SVector(0, 0) + if any(upperleft .!= 0) P = P ∘ Translation(-upperleft) end return P @@ -92,7 +92,7 @@ end function projectionbounds(tfm::ScaleFixed{N}, P, bounds::Bounds{N}; randstate = nothing) where N bounds_ = transformbounds(bounds, P) - return offsetcropbounds(tfm.sizes, bounds_, (1., 1.)) + return offsetcropbounds(tfm.sizes, bounds_, ntuple(_ -> 1., N)) end """ @@ -230,7 +230,7 @@ struct Reflect <: ProjectiveTransform end -function getprojection(tfm::Reflect, bounds; randstate = getrandstate(tfm)) +function getprojection(tfm::Reflect, bounds::Bounds{2}; randstate = getrandstate(tfm)) r = tfm.γ / 360 * 2pi return centered(LinearMap(reflectionmatrix(r)), bounds) end @@ -241,26 +241,73 @@ end Transform `P` so that is applied around the center of `bounds` instead of the origin """ -function centered(P, bounds::Bounds{2}) +function centered(P, bounds::Bounds{N}) where N upperleft = minimum.(bounds.rs) bottomright = maximum.(bounds.rs) - midpoint = SVector{2, Float32}((bottomright .- upperleft) ./ 2) .+ SVector{2, Float32}(.5, .5) + midpoint = SVector{N, Float32}((bottomright .- upperleft) ./ 2) .+ .5f0 return recenter(P, midpoint) end + +function reflectionmatrix(r) + A = SMatrix{2, 2, Float32}(cos(2r), sin(2r), sin(2r), -cos(2r)) + return round.(A; digits = 12) +end + + +""" + FlipDim{N}(dim) + +Reflect `N` dimensional data along the axis of dimension `dim`. Must satisfy 1 <= `dim` <= `N`. + +## Examples + +```julia +tfm = FlipDim{2}(1) +``` +""" +struct FlipDim{N} <: ProjectiveTransform + dim::Int + FlipDim{N}(dim) where N = 1 <= dim <= N ? new{N}(dim) : error("invalid dimension") +end + """ -Reflect(180) + FlipX{N}() + +Flip `N` dimensional data along the x-axis. 2D images use (r, c) = (y, x) +convention such that x-axis flips occur along the second dimension. For N >= 3, +x-axis flips occur along the first dimension. """ -FlipX() = Reflect(180) +struct FlipX{N} + FlipX{N}() where N = FlipDim{N}(N==2 ? 2 : 1) +end + """ -Reflect(90) + FlipY{N}() + +Flip `N` dimensional data along the y-axis. 2D images use (r, c) = (y, x) +convention such that y-axis flips occur along the first dimension. For N >= 3, +y-axis flips occur along the second dimension. """ -FlipY() = Reflect(90) +struct FlipY{N} + FlipY{N}() where N = FlipDim{N}(N==2 ? 1 : 2) +end -function reflectionmatrix(r) - A = SMatrix{2, 2, Float32}(cos(2r), sin(2r), sin(2r), -cos(2r)) - return round.(A; digits = 12) +""" + FlipZ{N}() + +Flip `N` dimensional data along the z-axis. +""" +struct FlipZ{N} + FlipZ{N}() where N = FlipDim{N}(3) +end + +function getprojection(tfm::FlipDim{N}, bounds::Bounds{N}; randstate = nothing) where N + arr = 1I(N) + arr[tfm.dim, tfm.dim] = -1 + M = SMatrix{N, N, Float32}(arr) + return DataAugmentation.centered(LinearMap(M), bounds) end @@ -281,8 +328,8 @@ at one. """ struct PinOrigin <: ProjectiveTransform end -function getprojection(::PinOrigin, bounds; randstate = nothing) - p = (-SVector{2, Float32}(minimum.(bounds.rs))) .+ 1 +function getprojection(::PinOrigin, bounds::Bounds{N}; randstate = nothing) where N + p = (-SVector{N, Float32}(minimum.(bounds.rs))) .+ 1 P = Translation(p) return P end diff --git a/src/projective/compose.jl b/src/projective/compose.jl index 4562b30..48b62cf 100644 --- a/src/projective/compose.jl +++ b/src/projective/compose.jl @@ -26,6 +26,9 @@ compose(composed::ComposedProjectiveTransform, tfm::ProjectiveTransform) = compose(tfm::ProjectiveTransform, composed::ComposedProjectiveTransform) = ComposedProjectiveTransform(tfm, composed.tfms...) +compose(composed1::ComposedProjectiveTransform, composed2::ComposedProjectiveTransform) = + ComposedProjectiveTransform(composed1.tfms..., composed2.tfms...) + # The random state is collected from the transformations that make up the # `ComposedProjectiveTransform`: diff --git a/test/projective/affine.jl b/test/projective/affine.jl index 1d51365..8c10825 100644 --- a/test/projective/affine.jl +++ b/test/projective/affine.jl @@ -189,16 +189,60 @@ include("../imports.jl") @test_nowarn apply!(buffer, tfm, image2) end - @testset ExtendedTestSet "`RandomCrop` correct indices" begin - # Flipping and cropping should be the same as reverse-indexing - # the flipped dimension - tfm = FlipX() |> RandomCrop((64, 64)) |> PinOrigin() - img = rand(RGB, 64, 64) + + @testset ExtendedTestSet "FlipX 2D correct indices" begin + tfm = FlipX{2}() |> RandomCrop((10,10)) |> PinOrigin() + img = rand(RGB, 10, 10) + item = Image(img) + @test_nowarn titem = apply(tfm, item) + titem = apply(tfm, item) + @test itemdata(titem) == img[:, end:-1:1] + end + + @testset ExtendedTestSet "FlipY 2D correct indices" begin + tfm = FlipY{2}() |> RandomCrop((10,10)) |> PinOrigin() + img = rand(RGB, 10, 10) + item = Image(img) + @test_nowarn titem = apply(tfm, item) + titem = apply(tfm, item) + @test itemdata(titem) == img[end:-1:1, :] + end + + + @testset ExtendedTestSet "FlipX 3D correct indices" begin + tfm = FlipX{3}() |> RandomCrop((10,10,10)) |> PinOrigin() + img = rand(RGB, 10, 10, 10) item = Image(img) + @test_nowarn titem = apply(tfm, item) titem = apply(tfm, item) - timg = itemdata(titem) - rimg = img[:, end:-1:1] - @test titem.data == rimg + @test itemdata(titem) == img[end:-1:1, :, :] + end + + @testset ExtendedTestSet "FlipY 3D correct indices" begin + tfm = FlipY{3}() |> RandomCrop((10,10,10)) |> PinOrigin() + img = rand(RGB, 10, 10, 10) + item = Image(img) + @test_nowarn titem = apply(tfm, item) + titem = apply(tfm, item) + @test itemdata(titem) == img[:, end:-1:1, :] + end + + @testset ExtendedTestSet "FlipZ 3D correct indices" begin + tfm = FlipZ{3}() |> RandomCrop((10,10,10)) |> PinOrigin() + img = rand(RGB, 10, 10, 10) + item = Image(img) + @test_nowarn titem = apply(tfm, item) + titem = apply(tfm, item) + @test itemdata(titem) == img[:, :, end:-1:1] + end + + @testset ExtendedTestSet "Double flip is identity" begin + tfm = FlipZ{3}() |> FlipZ{3}() |> RandomCrop((10,10,10)) |> PinOrigin() + img = rand(RGB, 10, 10, 10) + item = Image(img) + @test_nowarn titem = apply(tfm, item) + titem = apply(tfm, item) + @test itemdata(titem) == img end end @@ -207,8 +251,8 @@ end @testset ExtendedTestSet "2D" begin tfms = compose( Rotate(10), - FlipX(), - FlipY(), + FlipX{2}(), + FlipY{2}(), ScaleRatio((.8, .8)), WarpAffine(0.1), Zoom((1., 1.2)), @@ -227,9 +271,14 @@ end ) tfms = compose( + FlipX{3}(), + FlipY{3}(), + FlipZ{3}(), + ScaleFixed((30, 40, 50)), Rotate(10, 20, 30), ScaleRatio((.8, .8, .8)), ScaleKeepAspect((12, 10, 10)), + Zoom((1., 1.2)), RandomCrop((10, 10, 10)) ) testprojective(tfms, items)