diff --git a/include/matx/transforms/fft.h b/include/matx/transforms/fft.h
index 3bb4af4a5..c4223337b 100644
--- a/include/matx/transforms/fft.h
+++ b/include/matx/transforms/fft.h
@@ -235,8 +235,8 @@ template <typename OutTensorType, typename InTensorType> class matxFFTPlan_t {
     else if (fft_rank == 2) {
       if (params.transform_type == CUFFT_C2R ||
           params.transform_type == CUFFT_Z2D) {
-        params.n[0] = o.Size(RANK-1);
-        params.n[1] = o.Size(RANK-2);
+        params.n[1] = o.Size(RANK-1);
+        params.n[0] = o.Size(RANK-2);
       }
       else {
         params.n[1] = i.Size(RANK-1);
@@ -244,14 +244,14 @@ template <typename OutTensorType, typename InTensorType> class matxFFTPlan_t {
       }
 
       params.batch = (RANK == 2) ? 1 : i.Size(RANK - 3);
-      params.inembed[1] = o.Size(RANK-1);
-      params.onembed[1] = i.Size(RANK-1);
+      params.inembed[1] = i.Size(RANK-1);
+      params.onembed[1] = o.Size(RANK-1);
       params.istride = i.Stride(RANK-1);
       params.ostride = o.Stride(RANK-1);
       params.idist = (RANK<=2) ? 1 : (int) i.Stride(RANK-3);
       params.odist = (RANK<=2) ? 1 : (int) o.Stride(RANK-3);
 
-      if constexpr (is_complex_half_v<T1> || is_complex_half_v<T1>) {
+      if constexpr (is_complex_half_v<T1> || is_half_v<T1>) {
         if ((params.n[0] & (params.n[0] - 1)) != 0 ||
             (params.n[1] & (params.n[1] - 1)) != 0) {
           MATX_THROW(matxInvalidDim,
@@ -367,7 +367,7 @@ template <typename OutTensorType, typename InTensorType> class matxFFTPlan_t {
       if constexpr (is_complex_half_v<T2>) {
         return CUFFT_C2C;
       }
-      else if constexpr (is_half_v<T1>) {
+      else if constexpr (is_half_v<T2>) {
         return CUFFT_R2C;
       }
     }
@@ -1057,7 +1057,7 @@ __MATX_INLINE__ void ifft2_impl(OutputTensor o, const InputTensor i,
   }
 
     // Get parameters required by these tensors
-  auto params = detail::matxFFTPlan_t<decltype(in), decltype(out)>::GetFFTParams(out, in, 2);
+  auto params = detail::matxFFTPlan_t<decltype(out), decltype(in)>::GetFFTParams(out, in, 2);
   params.stream = stream;
   
   // Get cache or new FFT plan if it doesn't exist
diff --git a/test/00_transform/FFT.cu b/test/00_transform/FFT.cu
index 8c2349ff6..3246676b6 100644
--- a/test/00_transform/FFT.cu
+++ b/test/00_transform/FFT.cu
@@ -640,6 +640,65 @@ TYPED_TEST(FFTTestComplexTypes, FFT2D16C2C)
   MATX_EXIT_HANDLER();
 }
 
+TYPED_TEST(FFTTestComplexTypes, FFT2D16x32C2C)
+{
+  MATX_ENTER_HANDLER();
+  const index_t fft_dim[] = {16, 32};
+  this->pb->template InitAndRunTVGenerator<TypeParam>(
+      "00_transforms", "fft_operators", "fft_2d", {fft_dim[0], fft_dim[1]});
+
+  tensor_t<TypeParam, 2> av{{fft_dim[0], fft_dim[1]}};
+  tensor_t<TypeParam, 2> avo{{fft_dim[0], fft_dim[1]}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = fft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(FFTTestComplexTypes, FFT2D16BatchedC2C)
+{
+  MATX_ENTER_HANDLER();
+  const index_t batch_size = 10;
+  const index_t fft_dim = 16;
+  this->pb->template InitAndRunTVGenerator<TypeParam>(
+      "00_transforms", "fft_operators", "fft_2d_batched",
+      {batch_size, fft_dim, fft_dim});
+
+  tensor_t<TypeParam, 3> av{{batch_size, fft_dim, fft_dim}};
+  tensor_t<TypeParam, 3> avo{{batch_size, fft_dim, fft_dim}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = fft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(FFTTestComplexTypes, FFT2D16BatchedStridedC2C)
+{
+  MATX_ENTER_HANDLER();
+  const index_t batch_size = 10;
+  const index_t fft_dim = 16;
+  this->pb->template InitAndRunTVGenerator<TypeParam>(
+      "00_transforms", "fft_operators", "fft_2d_batched_strided",
+      {fft_dim, batch_size, fft_dim});
+
+  tensor_t<TypeParam, 3> av{{fft_dim, batch_size, fft_dim}};
+  tensor_t<TypeParam, 3> avo{{fft_dim, batch_size, fft_dim}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  const int32_t axes[] = {0, 2};
+  (avo = fft2(av, axes)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
 TYPED_TEST(FFTTestComplexTypes, IFFT2D16C2C)
 {
   MATX_ENTER_HANDLER();
@@ -658,6 +717,99 @@ TYPED_TEST(FFTTestComplexTypes, IFFT2D16C2C)
   MATX_EXIT_HANDLER();
 }
 
+TYPED_TEST(FFTTestComplexTypes, IFFT2D16x32C2C)
+{
+  MATX_ENTER_HANDLER();
+  const index_t fft_dim[] = {16, 32};
+  this->pb->template InitAndRunTVGenerator<TypeParam>(
+      "00_transforms", "fft_operators", "ifft_2d", {fft_dim[0], fft_dim[1]});
+
+  tensor_t<TypeParam, 2> av{{fft_dim[0], fft_dim[1]}};
+  tensor_t<TypeParam, 2> avo{{fft_dim[0], fft_dim[1]}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = ifft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(FFTTestComplexNonHalfTypes, FFT2D16R2C)
+{
+  MATX_ENTER_HANDLER();
+  const index_t fft_dim = 16;
+  using rtype = typename TypeParam::value_type;
+  this->pb->template InitAndRunTVGenerator<rtype>(
+      "00_transforms", "fft_operators", "rfft_2d", {fft_dim, fft_dim});
+
+  tensor_t<rtype, 2> av{{fft_dim, fft_dim}};
+  tensor_t<TypeParam, 2> avo{{fft_dim, fft_dim / 2 + 1}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = fft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(FFTTestComplexNonHalfTypes, FFT2D16x32R2C)
+{
+  MATX_ENTER_HANDLER();
+  const index_t fft_dim[] = {16, 32};
+  using rtype = typename TypeParam::value_type;
+  this->pb->template InitAndRunTVGenerator<rtype>(
+      "00_transforms", "fft_operators", "rfft_2d", {fft_dim[0], fft_dim[1]});
+
+  tensor_t<rtype, 2> av{{fft_dim[0], fft_dim[1]}};
+  tensor_t<TypeParam, 2> avo{{fft_dim[0], fft_dim[1] / 2 + 1}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = fft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(FFTTestComplexNonHalfTypes, IFFT2D16C2R)
+{
+  MATX_ENTER_HANDLER();
+  const index_t fft_dim = 16;
+  using rtype = typename TypeParam::value_type;
+  this->pb->template InitAndRunTVGenerator<TypeParam>(
+      "00_transforms", "fft_operators", "irfft_2d", {fft_dim, fft_dim});
+
+  tensor_t<TypeParam, 2> av{{fft_dim, fft_dim / 2 + 1}};
+  tensor_t<rtype, 2> avo{{fft_dim, fft_dim}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = ifft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(FFTTestComplexNonHalfTypes, IFFT2D16x32C2R)
+{
+  MATX_ENTER_HANDLER();
+  const index_t fft_dim[] = {16, 32};
+  using rtype = typename TypeParam::value_type;
+  this->pb->template InitAndRunTVGenerator<TypeParam>(
+      "00_transforms", "fft_operators", "irfft_2d", {fft_dim[0], fft_dim[1]});
+
+  tensor_t<TypeParam, 2> av{{fft_dim[0], fft_dim[1] / 2 + 1}};
+  tensor_t<rtype, 2> avo{{fft_dim[0], fft_dim[1]}};
+  this->pb->NumpyToTensorView(av, "a_in");
+
+  (avo = ifft2(av)).run();
+  cudaStreamSynchronize(0);
+
+  MATX_TEST_ASSERT_COMPARE(this->pb, avo, "a_out", this->thresh);
+  MATX_EXIT_HANDLER();
+}
 
 TYPED_TEST(FFTTestComplexNonHalfTypes, FFT1D1024C2CShort)
 {
diff --git a/test/test_vectors/generators/00_transforms.py b/test/test_vectors/generators/00_transforms.py
index 69c0cc5ca..6501a7e26 100755
--- a/test/test_vectors/generators/00_transforms.py
+++ b/test/test_vectors/generators/00_transforms.py
@@ -319,7 +319,23 @@ def fft_2d(self) -> Dict[str, np.ndarray]:
             (self.size[0], self.size[1]), self.dtype)
         return {
             'a_in': seq,
-            'a_out': np.fft.fft2(seq, (self.size[1], self.size[1]))
+            'a_out': np.fft.fft2(seq, (self.size[0], self.size[1]))
+        }
+
+    def fft_2d_batched(self) -> Dict[str, np.ndarray]:
+        seq = matx_common.randn_ndarray(
+            (self.size[0], self.size[1], self.size[2]), self.dtype)
+        return {
+            'a_in': seq,
+            'a_out': np.fft.fft2(seq, (self.size[1], self.size[2]))
+        }
+
+    def fft_2d_batched_strided(self) -> Dict[str, np.ndarray]:
+        seq = matx_common.randn_ndarray(
+            (self.size[0], self.size[1], self.size[2]), self.dtype)
+        return {
+            'a_in': seq,
+            'a_out': np.fft.fft2(seq, (self.size[0], self.size[2]), axes=(0, 2))
         }
 
     def ifft_2d(self) -> Dict[str, np.ndarray]:
@@ -327,7 +343,23 @@ def ifft_2d(self) -> Dict[str, np.ndarray]:
             (self.size[0], self.size[1]), self.dtype)
         return {
             'a_in': seq,
-            'a_out': np.fft.ifft2(seq, (self.size[1], self.size[1]))
+            'a_out': np.fft.ifft2(seq, (self.size[0], self.size[1]))
+        }
+
+    def rfft_2d(self) -> Dict[str, np.ndarray]:
+        seq = matx_common.randn_ndarray(
+            (self.size[0], self.size[1]), self.dtype)
+        return {
+            'a_in': seq,
+            'a_out': np.fft.rfft2(seq, (self.size[0], self.size[1]))
+        }
+
+    def irfft_2d(self) -> Dict[str, np.ndarray]:
+        seq = matx_common.randn_ndarray(
+            (self.size[0], self.size[1]), self.dtype)
+        return {
+            'a_in': seq,
+            'a_out': np.fft.irfft2(seq, (self.size[0], self.size[1]))
         }