NVIDIA · tylera-nvidia · May 1, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -19,6 +19,9 @@ set(examples
     black_scholes
     print_styles)
 
+# include_directories(SYSTEM eigen/) # Uncomment_Eigen
+
+
 add_library(example_lib INTERFACE)
 target_include_directories(example_lib SYSTEM INTERFACE ${CUTLASS_INC} ${pybind11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS})
 

diff --git a/examples/eigenExample.cu b/examples/eigenExample.cu
@@ -34,11 +34,11 @@
 
 
 // BUILD NOTES: TO build, include the path to the Eigen header files and uncomment all Eigen commands in this file. 
-// #include <Eigen/Dense>
+// to build with eigen, search for all instances of "Uncomment_Eigen" and uncomment those lines
+// #include <Eigen/Dense> // Uncomment_Eigen
 
 #include <iostream>
 
-using namespace matx;
 
 
 int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
@@ -47,226 +47,253 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   int dimY = 3;
 
 
-  // Eigen::MatrixXd a(dimX, dimY);
-  // Eigen::MatrixXd b(dimX, dimY);
-  // Eigen::MatrixXd addResult(dimX, dimY);
-  // Eigen::MatrixXd divResult(dimX, dimY);
-  // Eigen::MatrixXd multResult(dimX, dimY);
-  // Eigen::MatrixXd elementWise(dimX, dimY);
+  ///////////////////////////////////////////////////////////////////////////////
+  //////////////               Eigen Test Data Setup               //////////////
+  ///////////////////////////////////////////////////////////////////////////////
 
-  auto aT = make_tensor<double>({dimX,dimY});
-  auto bT = make_tensor<double>({dimX,dimY});
-  auto addResultT = make_tensor<double>({dimX,dimY});
-  auto divResultT = make_tensor<double>({dimX,dimY});
-  auto multResultT = make_tensor<double>({dimX,dimY});
-  auto elementWiseT = make_tensor<double>({dimX,dimY});
+  // Eigen::MatrixXd a(dimX, dimY);            // Uncomment_Eigen
+  // Eigen::MatrixXd b(dimX, dimY);            // Uncomment_Eigen
+  // Eigen::MatrixXf matrix10x10(10, 10);      // Uncomment_Eigen
+  // Eigen::RowVectorXd rowVec(dimX);          // Uncomment_Eigen
+  // Eigen::Matrix<std::complex<double>, 2, 2> complexMatrix;  // Uncomment_Eigen
 
-  // Initialize A and B with random values
-  // a.setRandom();
-  // b.setRandom();
+  ///////////////////////////////////////////////////////////////////////////////
+  //////////////                MatX Test Data Setup               //////////////
+  ///////////////////////////////////////////////////////////////////////////////
+  auto aTensor  = matx::make_tensor<double>({dimX,dimY});
+  auto bTensor  = matx::make_tensor<double>({dimX,dimY});
+  auto tensor1D = matx::make_tensor<double>({dimX});
+  auto matTensor10x10 = matx::make_tensor<float>({10,10});
+  auto complexTensor = matx::make_tensor<cuda::std::complex<double>>({2,2});
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  //////////////                   Initialize Data                 //////////////
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // provide data in tensors if eigen is commented out. not needed if eigen is setting data
+  (aTensor = matx::random<double>({dimX, dimY}, matx::UNIFORM)).run(); 
+  (bTensor = matx::random<double>({dimX, dimY}, matx::UNIFORM)).run(); 
+  (matTensor10x10 = matx::random<double>({10, 10}, matx::UNIFORM)).run(); 
+  (complexTensor = matx::random<cuda::std::complex<double>>({2, 2}, matx::UNIFORM)).run(); 
+
+  // Initialize with random values
+  // a.setRandom(); // Uncomment_Eigen
+  // b.setRandom(); // Uncomment_Eigen
+  // matrix10x10.setRandom(); // Uncomment_Eigen 
+
+  // rowVec << 1, 2, 3; // Uncomment_Eigen
+
+  // complexMatrix(0, 0) = std::complex<double>(1.0, 2.0); // Uncomment_Eigen
+  // complexMatrix(0, 1) = std::complex<double>(2.0, 3.0); // Uncomment_Eigen
+  // complexMatrix(1, 0) = std::complex<double>(3.0, 4.0); // Uncomment_Eigen
+  // complexMatrix(1, 1) = std::complex<double>(4.0, 5.0); // Uncomment_Eigen
+
+
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  //////////////              Copy Eigen inputs to MatX            //////////////
+  ///////////////////////////////////////////////////////////////////////////////
 
-  // Create a 1x5 matrix
-  // Eigen::RowVectorXd rowVec(dimX);
-  // rowVec << 1, 2, 3;
+
+
+  // cudaMemcpy(aTensor.Data(), a.data(), sizeof(double) * dimX * dimY, cudaMemcpyHostToDevice); // Uncomment_Eigen
+  // cudaMemcpy(bTensor.Data(), b.data(), sizeof(double) * dimX * dimY, cudaMemcpyHostToDevice); // Uncomment_Eigen
+  // cudaMemcpy(matTensor10x10.Data(), matrix10x10.data(), sizeof(float)*10*10, cudaMemcpyHostToDevice);   // Uncomment_Eigen
+  // cudaMemcpy(complexTensor.Data(), complexMatrix.data(), sizeof(std::complex<double>)*2*2, cudaMemcpyHostToDevice);   // Uncomment_Eigen
 
-  auto rowT = make_tensor<double>({dimX});
-  rowT(0) = 1;
-  rowT(1) = 2;
-  rowT(2) = 3;
 
-  // copy A and B to MatXTensor
-  // int dataSize = sizeof(double) * dimX * dimY;
-  // cudaMemcpy(aT.Data(), a.data(), dataSize, cudaMemcpyHostToDevice);
-  // cudaMemcpy(bT.Data(), b.data(), dataSize, cudaMemcpyHostToDevice);
+  // (aTensor = matx::transpose(aTensor)).run();      // Uncomment_Eigen
+  // (bTensor = matx::transpose(bTensor)).run();      // Uncomment_Eigen
+  // (matTensor10x10 = matx::transpose(matTensor10x10)).run();  // Uncomment_Eigen
+  // (complexTensor = matx::transpose(complexTensor)).run();  // Uncomment_Eigen
 
-  //transpose to correct storage order
-  (aT = transpose(aT)).run();
-  (bT = transpose(bT)).run();
+  tensor1D(0) = 1;
+  tensor1D(1) = 2;
+  tensor1D(2) = 3;
+  cudaDeviceSynchronize();  
 
   // slower alternative of copying per-element
   // for(int curX=0; curX<dimX; curX++)
   // {
   //   for(int curY=0; curY<dimY; curY++)
   //   {
-  //     aT(curX,curY) = a(curX,curY); 
-  //     bT(curX,curY) = b(curX,curY); 
+  //     aTensor(curX,curY) = a(curX,curY); 
+  //     bTensor(curX,curY) = b(curX,curY); 
   //   } 
   // }
 
 
+  ///////////////////////////////////////////////////////////////////////////////
+  //////////////                 Operation Examples                //////////////
+  ///////////////////////////////////////////////////////////////////////////////
+
   //
   // Basic Indexing
   //
   std::cout << "=================== Indexing ===================" << std::endl;
-  // std::cout << "eigen a(1,2) = " <<  a(1,2) << std::endl;
-  std::cout << "MatX  a(1,2) = " << aT(1,2) << std::endl;
+  // std::cout << "eigen a(1,2) = " <<  a(1,2) << std::endl;  // Uncomment_Eigen
+
+  std::cout << "MatX  a(1,2) = " << aTensor(1,2) << std::endl;
 
 
   //
   // Add A and B
   //
   std::cout << "=================== Addition ===================" << std::endl;
-  // addResult = a + b;
-  // std::cout << "A + B = \n" << addResult << std::endl;
-  (addResultT = aT + bT).run();
-  matx::print(addResultT);  
+  // Eigen::MatrixXd addResult = a + b; // Uncomment_Eigen
+  // std::cout << "A + B = \n" << addResult << std::endl; // Uncomment_Eigen
+
+  auto addTensor = aTensor + bTensor;
+  matx::print(addTensor);  
 
 
   //
   // Element-Wise Multiply A and B
   // 
   std::cout << "=================== Element-Wise Multiply ===================" << std::endl;
-  // elementWise = a.cwiseProduct(b);
-  // std::cout << "A .* B = \n" << elementWise << std::endl;
+  // Eigen::MatrixXd elementWise = a.cwiseProduct(b); // Uncomment_Eigen
+  // std::cout << "A .* B = \n" << elementWise << std::endl; // Uncomment_Eigen
+
+  auto elementWiseTensor = aTensor*bTensor;
+  matx::print(elementWiseTensor);  
 
-  (elementWiseT=aT*bT).run();
-  matx::print(elementWiseT);  
 
   //
   // Divide A and B 
   //
   std::cout << "=================== Element-Wise Division ===================" << std::endl;
-  // divResult = a.cwiseQuotient(b);
-  // std::cout << "A / B = \n" << divResult << std::endl;
+  // Eigen::MatrixXd divResult = a.cwiseQuotient(b); // Uncomment_Eigen
+  // std::cout << "A / B = \n" << divResult << std::endl; // Uncomment_Eigen
+
+  auto divResultTensor = aTensor / bTensor;
+  matx::print(divResultTensor);
 
-  (divResultT= aT / bT).run();
-  matx::print(divResultT);
 
   //
   // Slice (Continuous)
   //
   std::cout << "=================== Continuous Slice ===================" << std::endl;
-  // Eigen::Matrix2d aSlice = a.block(0, 0, 2, 2);
-  // std::cout << "A Sliced: \n" << aSlice << std::endl;
+  // Eigen::Matrix2d aSlice = a.block(0, 0, 2, 2);       // Uncomment_Eigen 
+  // std::cout << "A Sliced: \n" << aSlice << std::endl; // Uncomment_Eigen 
+
+  auto aSliceTensor = matx::slice<2>(aTensor,{0,0},{2,2});
+  matx::print(aSliceTensor);
 
-  auto aSliceT = matx::slice<2>(aT,{0,0},{2,2});
-  matx::print(aSliceT);
 
   //
   // Slice (Strided)
   //
-  std::cout << "=================== Strided Slice ===================" << std::endl;
-  // Eigen::MatrixXf matrix(10, 10); // Create a 10x10 matrix
-  // matrix.setRandom(); // Fill it with random numbers
-
-  auto matT = make_tensor<float>({10,10});
-  // cudaMemcpy(matT.Data(), matrix.data(), sizeof(float)*10*10, cudaMemcpyHostToDevice);
-  (matT = transpose(matT)).run();  
-  cudaDeviceSynchronize();
-
-  // std::cout << "Original matrix:\n" << matrix << "\n\n";
-
+  std::cout << "=================== Strided Slice ===================" << std::endl;  
+  // std::cout << "Original matrix10x10:\n" << matrix10x10 << "\n\n"; // Uncomment_Eigen
   // Define the starting point, number of elements to select, and strides for both rows and columns
-  // int startRow = 0, startCol = 0; // Starting index for rows and columns
-  // int rowStride = 3, colStride = 2; // Stride along rows and columns
-  // int numRows = 5; // Calculate the number of rows, considering every second element
-  // int numCols = 3; // Grab every third item until the 8th item (0, 3, 6)
-
+  // int startRow = 0, startCol = 0; // Starting index for rows and columns             
+  // int rowStride = 3, colStride = 2; // Stride along rows and columns                 
+  // int numRows = 5; // Calculate the number of rows, considering every second element 
+  // int numCols = 3; // Grab every third item until the 8th item (0, 3, 6)             
+  
   // Create a Map with Stride to access the elements
-      // Eigen::Map<Eigen::MatrixXf, 0, Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>> 
-      // strided(matrix.data() + 0 * matrix.outerStride() + 0, 
-              // 5, 3, 
-              // Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>(rowStride * matrix.outerStride(), colStride));
+  // Eigen::Map<Eigen::MatrixXf, 0, Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>>                        // Uncomment_Eigen   
+  // strided(matrix10x10.data() + 0 * matrix10x10.outerStride() + 0,                                                // Uncomment_Eigen     
+          // 5, 3,                                                                                        // Uncomment_Eigen  
+          // Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>(3 * matrix10x10.outerStride(), 2));                 // Uncomment_Eigen 
 
-  // Print the strided matrix
-  // std::cout << "Strided matrix:\n" << strided << "\n";
+  // Print the strided matrix10x10
+  // std::cout << "Strided matrix10x10:\n" << strided << "\n";  // Uncomment_Eigen
 
-  auto slicedMat = matx::slice(matT, {0,0}, {matxEnd,9}, {2,3});
+  auto slicedMat = matx::slice(matTensor10x10, {0,0}, {matx::matxEnd,9}, {2,3});
   matx::print(slicedMat);
 
+
   //
   // Clone
   //
   std::cout << "=================== Clone ===================" << std::endl;
   // Use the replicate function to create a 5x5 matrix by replicating the 1x5 matrix
-  // Eigen::MatrixXd mat = rowVec.replicate(3, 1);
-  // std::cout << "1D Cloned to 2D \n" << mat << std::endl;
+  // Eigen::MatrixXd mat = rowVec.replicate(3, 1);           // Uncomment_Eigen
+  // std::cout << "1D Cloned to 2D \n" << mat << std::endl;  // Uncomment_Eigen
+
+  auto cloned3Tensor = matx::clone<2>(tensor1D, {3, matx::matxKeepDim});
+  matx::print(cloned3Tensor);    
 
-  auto cloned3T = clone<2>(rowT, {3, matxKeepDim});
-  matx::print(cloned3T);    
 
   //
   // Slice Row
   //
   std::cout << "=================== Slice Row ===================" << std::endl;
-  // Eigen::RowVector3d row = a.row(1);
-  // std::cout << "Sliced Row \n" << row << std::endl;
+  // Eigen::RowVector3d row = a.row(1);                // Uncomment_Eigen
+  // std::cout << "Sliced Row \n" << row << std::endl; // Uncomment_Eigen
 
-  auto rowSlice = slice<1>(aT, {1, 0}, {matxDropDim, matxEnd});
+  auto rowSlice = matx::slice<1>(aTensor, {1, 0}, {matx::matxDropDim, matx::matxEnd});
   matx::print(rowSlice);
 
+
   //
   // Permute Rows
   //
   std::cout << "=================== Permute Rows ===================" << std::endl;
-  // std::cout << "Original Matrix:\n" << a << std::endl;
+  // std::cout << "Original Matrix:\n" << a << std::endl;   // Uncomment_Eigen
   // Define a permutation a
-  // Eigen::PermutationMatrix<3> perm;
-  // perm.indices() << 2, 1, 0;  // This permutation swaps the first and third rows
-  // Apply the permutation to the rows
-  // Eigen::Matrix3d permutedMatrix = perm * a;
-  // std::cout << "Permuted Matrix (Rows):\n" << permutedMatrix << std::endl;
+  // Eigen::PermutationMatrix<3> perm;     // Uncomment_Eigen
+  // perm.indices() << 2, 1, 0;  // This permutation swaps the first and third rows // Uncomment_Eigen
+  // Apply the permutation to the rows  
+  // Eigen::Matrix3d permutedMatrix = perm * a; // Uncomment_Eigen
+  // std::cout << "Permuted Matrix (Rows):\n" << permutedMatrix << std::endl; // Uncomment_Eigen
 
   // Define a permutation a
-  auto permVec = make_tensor<int>({dimX});
+  auto permVec = matx::make_tensor<int>({dimX});
   permVec(0) = 2;
   permVec(1) = 1;
   permVec(2) = 0;
-  auto permMat = make_tensor<double>({dimX,dimY});
   // Apply the permutation to the rows    
-  (permMat = remap<0>(aT, permVec)).run();
-  matx::print(permMat);
+  auto permTensor = matx::remap<0>(aTensor, permVec);
+  matx::print(permTensor);
 
 
   //
   // Permutation Dimensions
   //
   std::cout << "=================== Permute Dimension ===================" << std::endl;
   // Unsupported by eigen
-  auto permA = permute(aT, {1,0});
+  auto permA = permute(aTensor, {1,0});
   matx::print(permA);
 
   //
   // Get Real Value
   //
   std::cout << "=================== Get Real Values ===================" << std::endl;
-  // Define a 2x2 matrix of complex numbers
-  // Eigen::Matrix<std::complex<double>, 2, 2> complexMatrix;
-  // complexMatrix(0, 0) = std::complex<double>(1.0, 2.0);
-  // complexMatrix(0, 1) = std::complex<double>(2.0, 3.0);
-  // complexMatrix(1, 0) = std::complex<double>(3.0, 4.0);
-  // complexMatrix(1, 1) = std::complex<double>(4.0, 5.0);
-
-  // Output the original complex matrix
-  // std::cout << "Original Complex Matrix:\n" << complexMatrix << std::endl;
+  // std::cout << "Original Complex Matrix:\n" << complexMatrix << std::endl; // Uncomment_Eigen 
 
   // Extract and output the real part of the complex matrix
-  // Eigen::Matrix<double, 2, 2> realMatrix = complexMatrix.real();
-  // std::cout << "Real Part of Matrix:\n" << realMatrix << std::endl;
+  // Eigen::Matrix<double, 2, 2> realMatrix = complexMatrix.real();    // Uncomment_Eigen
+  // std::cout << "Real Part of Matrix:\n" << realMatrix << std::endl; // Uncomment_Eigen
 
-  ///\todo TYLER_TODO setup code to have same matrix for MatX
+  auto realTensor = matx::real(complexTensor);
+  matx::print(realTensor);
+
 
   //
   // Multiply A and B 
   //
   std::cout << "=================== Matrix Multiply ===================" << std::endl;
-  // multResult = a * b;
-  // std::cout << "A * B = \n" << multResult << std::endl;
+  // Eigen::MatrixXd multResult = a * b;                    // Uncomment_Eigen
+  // std::cout << "A * B = \n" << multResult << std::endl;  // Uncomment_Eigen
 
-  (multResultT=matmul(aT,bT)).run();
-  matx::print(multResultT);
+  auto multResultTensor=matmul(aTensor,bTensor);
+  matx::print(multResultTensor);
 
 
   //
   // inverse Matrix
   //
   std::cout << "=================== Invert Matrix ===================" << std::endl;
-  // Eigen::MatrixXd inverseMatrix(dimX, dimY);
-  // inverseMatrix = a.inverse();
-  // std::cout << "Inverse of the Real Part:\n" << inverseMatrix << std::endl;
+  // Eigen::MatrixXd inverseMatrix(dimX, dimY);                                // current bug where .run() in inverse is ambiguous, so cannot be used with MatX 
+  // inverseMatrix = a.inverse();                                              // current bug where .run() in inverse is ambiguous, so cannot be used with MatX
+  // std::cout << "Inverse of the Real Part:\n" << inverseMatrix << std::endl; // current bug where .run() in inverse is ambiguous, so cannot be used with MatX
 
-  auto invMat = matx::inv(aT);
-  matx::print(invMat);
+  auto invTensor = matx::inv(aTensor);
+  matx::print(invTensor);
 
   //
   // 1D FFT