diff --git a/include/matx/transforms/fft.h b/include/matx/transforms/fft.h index 3bb4af4a5..c4223337b 100644 --- a/include/matx/transforms/fft.h +++ b/include/matx/transforms/fft.h @@ -235,8 +235,8 @@ template class matxFFTPlan_t { else if (fft_rank == 2) { if (params.transform_type == CUFFT_C2R || params.transform_type == CUFFT_Z2D) { - params.n[0] = o.Size(RANK-1); - params.n[1] = o.Size(RANK-2); + params.n[1] = o.Size(RANK-1); + params.n[0] = o.Size(RANK-2); } else { params.n[1] = i.Size(RANK-1); @@ -244,14 +244,14 @@ template class matxFFTPlan_t { } params.batch = (RANK == 2) ? 1 : i.Size(RANK - 3); - params.inembed[1] = o.Size(RANK-1); - params.onembed[1] = i.Size(RANK-1); + params.inembed[1] = i.Size(RANK-1); + params.onembed[1] = o.Size(RANK-1); params.istride = i.Stride(RANK-1); params.ostride = o.Stride(RANK-1); params.idist = (RANK<=2) ? 1 : (int) i.Stride(RANK-3); params.odist = (RANK<=2) ? 1 : (int) o.Stride(RANK-3); - if constexpr (is_complex_half_v || is_complex_half_v) { + if constexpr (is_complex_half_v || is_half_v) { if ((params.n[0] & (params.n[0] - 1)) != 0 || (params.n[1] & (params.n[1] - 1)) != 0) { MATX_THROW(matxInvalidDim, @@ -367,7 +367,7 @@ template class matxFFTPlan_t { if constexpr (is_complex_half_v) { return CUFFT_C2C; } - else if constexpr (is_half_v) { + else if constexpr (is_half_v) { return CUFFT_R2C; } } @@ -1057,7 +1057,7 @@ __MATX_INLINE__ void ifft2_impl(OutputTensor o, const InputTensor i, } // Get parameters required by these tensors - auto params = detail::matxFFTPlan_t::GetFFTParams(out, in, 2); + auto params = detail::matxFFTPlan_t::GetFFTParams(out, in, 2); params.stream = stream; // Get cache or new FFT plan if it doesn't exist diff --git a/test/00_transform/FFT.cu b/test/00_transform/FFT.cu index 8c2349ff6..3246676b6 100644 --- a/test/00_transform/FFT.cu +++ b/test/00_transform/FFT.cu @@ -640,6 +640,65 @@ TYPED_TEST(FFTTestComplexTypes, FFT2D16C2C) MATX_EXIT_HANDLER(); } +TYPED_TEST(FFTTestComplexTypes, FFT2D16x32C2C) +{ + MATX_ENTER_HANDLER(); + const index_t fft_dim[] = {16, 32}; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "fft_2d", {fft_dim[0], fft_dim[1]}); + + tensor_t av{{fft_dim[0], fft_dim[1]}}; + tensor_t avo{{fft_dim[0], fft_dim[1]}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = fft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + +TYPED_TEST(FFTTestComplexTypes, FFT2D16BatchedC2C) +{ + MATX_ENTER_HANDLER(); + const index_t batch_size = 10; + const index_t fft_dim = 16; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "fft_2d_batched", + {batch_size, fft_dim, fft_dim}); + + tensor_t av{{batch_size, fft_dim, fft_dim}}; + tensor_t avo{{batch_size, fft_dim, fft_dim}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = fft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + +TYPED_TEST(FFTTestComplexTypes, FFT2D16BatchedStridedC2C) +{ + MATX_ENTER_HANDLER(); + const index_t batch_size = 10; + const index_t fft_dim = 16; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "fft_2d_batched_strided", + {fft_dim, batch_size, fft_dim}); + + tensor_t av{{fft_dim, batch_size, fft_dim}}; + tensor_t avo{{fft_dim, batch_size, fft_dim}}; + this->pb->NumpyToTensorView(av, "a_in"); + + const int32_t axes[] = {0, 2}; + (avo = fft2(av, axes)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + TYPED_TEST(FFTTestComplexTypes, IFFT2D16C2C) { MATX_ENTER_HANDLER(); @@ -658,6 +717,99 @@ TYPED_TEST(FFTTestComplexTypes, IFFT2D16C2C) MATX_EXIT_HANDLER(); } +TYPED_TEST(FFTTestComplexTypes, IFFT2D16x32C2C) +{ + MATX_ENTER_HANDLER(); + const index_t fft_dim[] = {16, 32}; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "ifft_2d", {fft_dim[0], fft_dim[1]}); + + tensor_t av{{fft_dim[0], fft_dim[1]}}; + tensor_t avo{{fft_dim[0], fft_dim[1]}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = ifft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + +TYPED_TEST(FFTTestComplexNonHalfTypes, FFT2D16R2C) +{ + MATX_ENTER_HANDLER(); + const index_t fft_dim = 16; + using rtype = typename TypeParam::value_type; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "rfft_2d", {fft_dim, fft_dim}); + + tensor_t av{{fft_dim, fft_dim}}; + tensor_t avo{{fft_dim, fft_dim / 2 + 1}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = fft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + +TYPED_TEST(FFTTestComplexNonHalfTypes, FFT2D16x32R2C) +{ + MATX_ENTER_HANDLER(); + const index_t fft_dim[] = {16, 32}; + using rtype = typename TypeParam::value_type; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "rfft_2d", {fft_dim[0], fft_dim[1]}); + + tensor_t av{{fft_dim[0], fft_dim[1]}}; + tensor_t avo{{fft_dim[0], fft_dim[1] / 2 + 1}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = fft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + +TYPED_TEST(FFTTestComplexNonHalfTypes, IFFT2D16C2R) +{ + MATX_ENTER_HANDLER(); + const index_t fft_dim = 16; + using rtype = typename TypeParam::value_type; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "irfft_2d", {fft_dim, fft_dim}); + + tensor_t av{{fft_dim, fft_dim / 2 + 1}}; + tensor_t avo{{fft_dim, fft_dim}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = ifft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} + +TYPED_TEST(FFTTestComplexNonHalfTypes, IFFT2D16x32C2R) +{ + MATX_ENTER_HANDLER(); + const index_t fft_dim[] = {16, 32}; + using rtype = typename TypeParam::value_type; + this->pb->template InitAndRunTVGenerator( + "00_transforms", "fft_operators", "irfft_2d", {fft_dim[0], fft_dim[1]}); + + tensor_t av{{fft_dim[0], fft_dim[1] / 2 + 1}}; + tensor_t avo{{fft_dim[0], fft_dim[1]}}; + this->pb->NumpyToTensorView(av, "a_in"); + + (avo = ifft2(av)).run(); + cudaStreamSynchronize(0); + + MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh); + MATX_EXIT_HANDLER(); +} TYPED_TEST(FFTTestComplexNonHalfTypes, FFT1D1024C2CShort) { diff --git a/test/test_vectors/generators/00_transforms.py b/test/test_vectors/generators/00_transforms.py index 69c0cc5ca..6501a7e26 100755 --- a/test/test_vectors/generators/00_transforms.py +++ b/test/test_vectors/generators/00_transforms.py @@ -319,7 +319,23 @@ def fft_2d(self) -> Dict[str, np.ndarray]: (self.size[0], self.size[1]), self.dtype) return { 'a_in': seq, - 'a_out': np.fft.fft2(seq, (self.size[1], self.size[1])) + 'a_out': np.fft.fft2(seq, (self.size[0], self.size[1])) + } + + def fft_2d_batched(self) -> Dict[str, np.ndarray]: + seq = matx_common.randn_ndarray( + (self.size[0], self.size[1], self.size[2]), self.dtype) + return { + 'a_in': seq, + 'a_out': np.fft.fft2(seq, (self.size[1], self.size[2])) + } + + def fft_2d_batched_strided(self) -> Dict[str, np.ndarray]: + seq = matx_common.randn_ndarray( + (self.size[0], self.size[1], self.size[2]), self.dtype) + return { + 'a_in': seq, + 'a_out': np.fft.fft2(seq, (self.size[0], self.size[2]), axes=(0, 2)) } def ifft_2d(self) -> Dict[str, np.ndarray]: @@ -327,7 +343,23 @@ def ifft_2d(self) -> Dict[str, np.ndarray]: (self.size[0], self.size[1]), self.dtype) return { 'a_in': seq, - 'a_out': np.fft.ifft2(seq, (self.size[1], self.size[1])) + 'a_out': np.fft.ifft2(seq, (self.size[0], self.size[1])) + } + + def rfft_2d(self) -> Dict[str, np.ndarray]: + seq = matx_common.randn_ndarray( + (self.size[0], self.size[1]), self.dtype) + return { + 'a_in': seq, + 'a_out': np.fft.rfft2(seq, (self.size[0], self.size[1])) + } + + def irfft_2d(self) -> Dict[str, np.ndarray]: + seq = matx_common.randn_ndarray( + (self.size[0], self.size[1]), self.dtype) + return { + 'a_in': seq, + 'a_out': np.fft.irfft2(seq, (self.size[0], self.size[1])) }