NVIDIA · tbensonatl · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024
diff --git a/docs_input/api/casting/as_complex_double.rst b/docs_input/api/casting/as_complex_double.rst
@@ -0,0 +1,24 @@
+.. _as_complex_double_func:
+
+as_complex_double
+=================
+
+Cast an operator to cuda::std::complex<double>
+
+.. doxygenfunction:: matx::as_complex_double(T t)
+.. doxygenfunction:: matx::as_complex_double(T1 t1, T2 t2)
+
+Examples
+~~~~~~~~
+
+.. literalinclude:: ../../../test/00_operators/OperatorTests.cu
+   :language: cpp
+   :start-after: example-begin as_complex_double-test-1
+   :end-before: example-end as_complex_double-test-1
+   :dedent:
+
+.. literalinclude:: ../../../test/00_operators/OperatorTests.cu
+   :language: cpp
+   :start-after: example-begin as_complex_double-test-2
+   :end-before: example-end as_complex_double-test-2
+   :dedent:
diff --git a/docs_input/api/casting/as_complex_float.rst b/docs_input/api/casting/as_complex_float.rst
@@ -0,0 +1,24 @@
+.. _as_complex_float_func:
+
+as_complex_float
+=================
+
+Cast an operator to cuda::std::complex<float>
+
+.. doxygenfunction:: matx::as_complex_float(T t)
+.. doxygenfunction:: matx::as_complex_float(T1 t1, T2 t2)
+
+Examples
+~~~~~~~~
+
+.. literalinclude:: ../../../test/00_operators/OperatorTests.cu
+   :language: cpp
+   :start-after: example-begin as_complex_float-test-1
+   :end-before: example-end as_complex_float-test-1
+   :dedent:
+
+.. literalinclude:: ../../../test/00_operators/OperatorTests.cu
+   :language: cpp
+   :start-after: example-begin as_complex_float-test-2
+   :end-before: example-end as_complex_float-test-2
+   :dedent:
diff --git a/include/matx/operators/cast.h b/include/matx/operators/cast.h
@@ -108,8 +108,78 @@ namespace matx
           return op_.Size(dim);
         }
     };
-  }   
 
+    template <typename T1, typename T2, typename NewType>
+      class ComplexCastOp : public BaseOp<ComplexCastOp<T1, T2, NewType>>
+    {
+      private:
+        typename base_type<T1>::type real_op_;
+        typename base_type<T2>::type imag_op_;
+
+      public:
+        using matxop = bool;
+        using scalar_type = NewType;
+        static_assert(!is_complex_v<T1> && !is_complex_half_v<T1>, "T1 input operator cannot be complex");
+        static_assert(!is_complex_v<T2> && !is_complex_half_v<T2>, "T2 input operator cannot be complex");
+        static_assert(is_complex_v<NewType> || is_complex_half_v<NewType>, "ComplexCastOp output type should be complex");
+
+	      __MATX_INLINE__ std::string str() const { return as_type_str<NewType>() + "(" + real_op_.str() + "," + imag_op_.str() + ")"; }
+        __MATX_INLINE__ ComplexCastOp(T1 real_op, T2 imag_op) : real_op_(real_op), imag_op_(imag_op) {
+          static_assert(detail::get_rank<T1>() == detail::get_rank<T2>(), "rank of real and imaginary operators must match");
+          if (real_op_.Shape() != imag_op_.Shape()) {
+            MATX_THROW(matxInvalidSize, "ComplexCastOp: sizes of input operators must match in all dimensions");
+          }
+        };
+
+        template <typename... Is>
+        __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const
+        {
+          using inner_type = typename inner_op_type_t<NewType>::type;
+          return NewType(static_cast<inner_type>(real_op_(indices...)),static_cast<inner_type>(imag_op_(indices...)));
+        }
+
+        template <typename... Is>
+        __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices)
+        {
+          using inner_type = typename inner_op_type_t<NewType>::type;
+          return NewType(static_cast<inner_type>(real_op_(indices...)),static_cast<inner_type>(imag_op_(indices...)));
+        }
+
+        template <typename ShapeType, typename Executor>
+        __MATX_INLINE__ void PreRun(ShapeType &&shape, Executor &&ex) const noexcept
+        {
+          if constexpr (is_matx_op<T1>()) {
+            real_op_.PreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+          }
+
+          if constexpr (is_matx_op<T2>()) {
+            imag_op_.PreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+          }
+        }
+
+        template <typename ShapeType, typename Executor>
+        __MATX_INLINE__ void PostRun(ShapeType &&shape, Executor &&ex) const noexcept
+        {
+          if constexpr (is_matx_op<T1>()) {
+            real_op_.PostRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+          }
+          if constexpr (is_matx_op<T2>()) {
+            imag_op_.PostRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+          }
+        }
+
+        static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
+        {
+          // ctor static_assert verifies that detail::get_rank<T>() == detail::get_rank<U>()
+          return detail::get_rank<T1>();
+        }
+        constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
+        {
+          // ctor verifies that per dimensions sizes of real_op_ and imag_op_ match
+          return real_op_.Size(dim);
+        }
+    };
+  }
 
   /**
    * @brief Helper function to cast an input operator to a different type
@@ -130,6 +200,21 @@ namespace matx
       }
     };   
 
+  /**
+   * @brief Helper function to cast a pair of input operators to a complex type.
+   *
+   * @tparam T1 Input type for the real components of the complex type
+   * @tparam T2 Input type for the imaginary components of the complex type
+   * @tparam NewType Casted type (must be complex)
+   * @param t1 Input operator of type T1
+   * @param t2 Input operator of type T2
+   * @return Operator output casted to NewType (must be complex)
+   */
+  template <typename NewType, typename T1, typename T2>
+    auto __MATX_INLINE__ as_complex_type(T1 t1, T2 t2)
+    {
+      return detail::ComplexCastOp<T1, T2, NewType>(t1, t2);
+    };
 
   /**
    * @brief Helper function to cast an input operator to an int
@@ -170,6 +255,36 @@ namespace matx
       return as_type<cuda::std::complex<float>>(t);
     };
 
+  /**
+   * @brief Helper function to cast an input operator to a cuda::std::complex<float>
+   *
+   * @tparam T1 Input type for real components of the complex output type
+   * @tparam T2 Input type for imaginary components of the complex output type
+   * @param t1 Input operator for real components of the complex output type
+   * @param t2 Input operator for imaginary components of the complex output type
+   * @return Operator output casted to cuda::std::complex<float>
+   */
+  template <typename T1, typename T2>
+    auto __MATX_INLINE__ as_complex_float(T1 t1, T2 t2)
+    {
+      return as_complex_type<cuda::std::complex<float>>(t1, t2);
+    };
+
+  /**
+   * @brief Helper function to cast an input operator to a cuda::std::complex<double>
+   *
+   * @tparam T1 Input type for real components of the complex output type
+   * @tparam T2 Input type for imaginary components of the complex output type
+   * @param t1 Input operator for real components of the complex output type
+   * @param t2 Input operator for imaginary components of the complex output type
+   * @return Operator output casted to cuda::std::complex<double>
+   */
+  template <typename T1, typename T2>
+    auto __MATX_INLINE__ as_complex_double(T1 t1, T2 t2)
+    {
+      return as_complex_type<cuda::std::complex<double>>(t1, t2);
+    };
+
   /**
    * @brief Helper function to cast an input operator to an double
    * 

diff --git a/include/matx/operators/concat.h b/include/matx/operators/concat.h
@@ -98,7 +98,7 @@ namespace matx
             return scalar_type(-9999);
             // returning this to satisfy lvalue requirements
           } else {
-            auto &op = cuda::std::get<I>(ops_);
+            const auto &op = cuda::std::get<I>(ops_);
             auto idx = indices[axis_];
             auto size = op.Size(axis_);
             // If in range of this operator
@@ -114,7 +114,7 @@ namespace matx
         }
 
       template <int I = 0, int N>
-        __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto& GetVal(cuda::std::array<index_t,RANK> &indices) {
+        __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) GetVal(cuda::std::array<index_t,RANK> &indices) {
 
           if constexpr ( I == N ) {
             // This should never happen

diff --git a/test/00_operators/OperatorTests.cu b/test/00_operators/OperatorTests.cu
@@ -117,6 +117,9 @@ template <typename TensorType>
 class OperatorTestsBooleanAllExecs : public ::testing::Test {
 };
 
+template <typename TensorType>
+class OperatorTestsCastToFloatAllExecs : public ::testing::Test {
+};
 
 TYPED_TEST_SUITE(OperatorTestsFloatNonHalf,
   MatXFloatNonHalfTypesAllExecs);  
@@ -135,6 +138,7 @@ TYPED_TEST_SUITE(OperatorTestsAllExecs, MatXAllTypesAllExecs);
 TYPED_TEST_SUITE(OperatorTestsFloatAllExecs, MatXTypesFloatAllExecs);
 TYPED_TEST_SUITE(OperatorTestsIntegralAllExecs, MatXTypesIntegralAllExecs);
 TYPED_TEST_SUITE(OperatorTestsBooleanAllExecs, MatXTypesBooleanAllExecs);
+TYPED_TEST_SUITE(OperatorTestsCastToFloatAllExecs, MatXTypesCastToFloatAllExecs);
 
 TYPED_TEST(OperatorTestsAllExecs, BaseOp)
 {
@@ -3679,6 +3683,8 @@ TYPED_TEST(OperatorTestsNumericAllExecs, Downsample)
     (t1 = static_cast<TestType>(1)).run(exec);
     auto ds_op = downsample(t1, 0, n);
 
+    exec.sync();
+
     ASSERT_TRUE(ds_op.Size(0) == t1.Size(0) / n + 1);
     for (index_t i = 0; i < ds_op.Size(0); i++) {
       ASSERT_TRUE(MatXUtils::MatXTypeCompare(ds_op(i), t1(i * n)));
@@ -4162,34 +4168,153 @@ TEST(OperatorTests, Cast)
     ASSERT_EQ(to(i), -4); // -4 from 126 + 126 wrap-around
   }  
 
+  // example-begin as_complex_float-test-1
   auto c32 = make_tensor<cuda::std::complex<float>>({});
-  auto c64 = make_tensor<cuda::std::complex<double>>({});
-  auto s32 = make_tensor<float>({});
   auto s64 = make_tensor<double>({});
-  s32.SetVals({3.0f});
   s64.SetVals({5.0});
+  (c32 = as_complex_float(s64)).run();
+  // c32() will be (5.0f, 0.0f)
+  // example-end as_complex_float-test-1
 
-  (c32 = as_complex_float(s32)).run();
+  // example-begin as_complex_double-test-1
+  auto c64 = make_tensor<cuda::std::complex<double>>({});
+  auto s32 = make_tensor<float>({});
+  s32.SetVals({3.0f});
   (c64 = as_complex_double(s32)).run();
+  // c64() will be (3.0, 0.0)
+  // example-end as_complex_double-test-1
+
   cudaStreamSynchronize(0);
 
-  ASSERT_EQ(c32().real(), 3.0f);
+  ASSERT_EQ(c32().real(), 5.0f);
   ASSERT_EQ(c32().imag(), 0.0f);
   ASSERT_EQ(c64().real(), 3.0);
   ASSERT_EQ(c64().imag(), 0.0);
 
-  (c32 = as_complex_float(s64)).run();
+  (c32 = as_complex_float(s32)).run();
   (c64 = as_complex_double(s64)).run();
   cudaStreamSynchronize(0);
 
-  ASSERT_EQ(c32().real(), 5.0f);
+  ASSERT_EQ(c32().real(), 3.0f);
   ASSERT_EQ(c32().imag(), 0.0f);
   ASSERT_EQ(c64().real(), 5.0);
   ASSERT_EQ(c64().imag(), 0.0);
 
   MATX_EXIT_HANDLER();
 }
 
+TEST(OperatorTests, ComplexCastExceptions)
+{
+  MATX_ENTER_HANDLER();
+  index_t count0 = 4;
+  auto t = make_tensor<int8_t>({count0});
+  auto t2 = make_tensor<int8_t>({count0});
+  auto to = make_tensor<float>({count0});
+
+  cudaExecutor exec{};
+
+  const int N = 3;
+  cuda::std::array<long long, N> real_dims, imag_dims;
+  real_dims.fill(5);
+  imag_dims.fill(5);
+
+  auto out = make_tensor<cuda::std::complex<float>>(real_dims);
+  auto test_code = [&real_dims, &imag_dims]() {
+      auto re = make_tensor<float>(real_dims);
+      auto im = make_tensor<float>(imag_dims);
+      [[maybe_unused]] auto op = as_complex_float(re, im);
+  };
+
+  for (int i = 0; i < N; i++) {
+    real_dims[i] = 6;
+    ASSERT_THROW({ test_code(); }, matx::detail::matxException);
+    real_dims[i] = 5;
+
+    imag_dims[i] = 6;
+    ASSERT_THROW({ test_code(); }, matx::detail::matxException);
+    imag_dims[i] = 5;
+  }
+
+  ASSERT_NO_THROW({ test_code(); });
+
+  MATX_EXIT_HANDLER();
+}
+
+TYPED_TEST(OperatorTestsCastToFloatAllExecs, ComplexCast)
+{
+  MATX_ENTER_HANDLER();
+
+  using TestType = cuda::std::tuple_element_t<0, TypeParam>;
+  using ExecType = cuda::std::tuple_element_t<1, TypeParam>;
+
+  ExecType exec{};
+
+  // 0D tensor tests
+  {
+    // example-begin as_complex_double-test-2
+    auto c64 = make_tensor<cuda::std::complex<double>>({});
+    auto in_real = make_tensor<TestType>({});
+    auto in_imag = make_tensor<TestType>({});
+    in_real.SetVals({3});
+    in_imag.SetVals({5});
+    (c64 = as_complex_double(in_real, in_imag)).run(exec);
+    // c64() will be (3.0, 5.0)
+    // example-end as_complex_double-test-2
+    exec.sync();
+
+    ASSERT_EQ(c64().real(), 3.0);
+    ASSERT_EQ(c64().imag(), 5.0);
+  }
+  {
+    // example-begin as_complex_float-test-2
+    auto c32 = make_tensor<cuda::std::complex<float>>({});
+    auto in_real = make_tensor<TestType>({});
+    auto in_imag = make_tensor<TestType>({});
+    in_real.SetVals({3});
+    in_imag.SetVals({5});
+    (c32 = as_complex_float(in_real, in_imag)).run(exec);
+    // c32() will be (3.0f, 5.0f)
+    // example-end as_complex_float-test-2
+    exec.sync();
+
+    ASSERT_EQ(c32().real(), 3.0f);
+    ASSERT_EQ(c32().imag(), 5.0f);
+  }
+
+  // 2D tensor tests
+  {
+    const int N = 4;
+    auto c32 = make_tensor<cuda::std::complex<float>>({N,N});
+    auto c64 = make_tensor<cuda::std::complex<double>>({N,N});
+    auto in_real = make_tensor<TestType>({N,N});
+    auto in_imag = make_tensor<TestType>({N,N});
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        in_real(i,j) = static_cast<TestType>(4);
+        in_imag(i,j) = static_cast<TestType>(6);
+      }
+    }
+
+    exec.sync();
+
+    (c32 = as_complex_float(in_real, in_imag)).run(exec);
+    (c64 = as_complex_double(in_real, in_imag)).run(exec);
+
+    exec.sync();
+
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        ASSERT_EQ(c32(i,j).real(), 4.0f);
+        ASSERT_EQ(c32(i,j).imag(), 6.0f);
+        ASSERT_EQ(c64(i,j).real(), 4.0);
+        ASSERT_EQ(c64(i,j).imag(), 6.0);
+      }
+    }
+  }
+
+  MATX_EXIT_HANDLER();
+}
+
 template<class TypeParam>
 TypeParam legendre_check(int n, int m, TypeParam x) {
 	if (m > n ) return 0;