introduce a versatile sparse tensor type to MatX (experimental)

aartbik · aartbik · commit 60550bed0822 · 2025-01-16T11:34:14.000-08:00
This PR introduces the implementation of a single versatile sparse tensor type that uses a tensor format DSL (Domain Specific Language) to describe a vast space of storage formats. Although the tensor format can easily define many common storage formats (such as Dense, COO, CSR, CSC, BSR), it can also define many less common storage formats. In addition, the tensor format DSL can be extended to include even more storage formats in the future.

This first PR simply introduces all storage details for the single versatile sparse tensor type, together with some factory methods for constructing COO, CSR, and CSC sparse matrices from MatX buffers. Later PRs will introduce more general ways of constructing sparse tensors (e.g. from file) and actual operations like SpMV and SpMM using cuSPARSE.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -11,6 +11,7 @@ set(examples
     mvdr_beamformer
     pwelch
     resample_poly_bench
+    sparse_tensor
     spectrogram
     spectrogram_graph
     spherical_harmonics
diff --git a/examples/sparse_tensor.cu b/examples/sparse_tensor.cu
@@ -0,0 +1,87 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2025, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#include "matx.h"
+
+using namespace matx;
+
+int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
+{
+  MATX_ENTER_HANDLER();
+
+  cudaStream_t stream = 0;
+  cudaExecutor exec{stream};
+
+  //
+  // Creates a COO matrix for the following 4x8 dense matrix with 5 nonzero
+  // elements, using the factory method that uses MatX tensors for the 1-dim
+  // buffers. The sparse matrix resides in the same memory space as its buffer
+  // constituents.
+  //
+  //   | 1, 2, 0, 0, 0, 0, 0, 0 |
+  //   | 0, 0, 0, 0, 0, 0, 0, 0 |
+  //   | 0, 0, 0, 0, 0, 0, 0, 0 |
+  //   | 0, 0, 3, 4, 0, 5, 0, 0 |
+  //
+  
+  constexpr index_t m = 4;
+  constexpr index_t n = 8;
+  constexpr index_t nse = 5;
+
+  tensor_t<float, 1> values{{nse}};
+  tensor_t<int, 1> row_idx{{nse}};
+  tensor_t<int, 1> col_idx{{nse}};
+
+  values.SetVals({ 1, 2, 3, 4, 5 });
+  row_idx.SetVals({ 0, 0, 3, 3, 3 });
+  col_idx.SetVals({ 0, 1, 2, 3, 5 });
+
+  // Note that sparse tensor support in MatX is still experimental.
+  auto Acoo = experimental::make_tensor_coo(values, row_idx, col_idx, {m, n});
+
+  //
+  // This shows:
+  //
+  // tensor_impl_2_f32: Tensor{float} Rank: 2, Sizes:[4, 8], Levels:[4, 8]
+  // nse    = 5
+  // format = ( d0, d1 ) -> ( d0 : compressed(non-unique), d1 : singleton )
+  // crd[0] = ( 0  0  3  3  3 )
+  // crd[1] = ( 0  1  2  3  5 )
+  // values = ( 1.0000e+00  2.0000e+00  3.0000e+00  4.0000e+00  5.0000e+00 )
+  // space  = CUDA managed memory
+  //
+  print(Acoo);
+
+  // TODO: operations on Acoo
+
+  MATX_EXIT_HANDLER();
+}
diff --git a/include/matx.h b/include/matx.h
@@ -45,6 +45,8 @@
 #include "matx/core/print.h"
 #include "matx/core/pybind.h"
 #include "matx/core/tensor.h"
+#include "matx/core/sparse_tensor.h"  // sparse support is experimental
+#include "matx/core/make_sparse_tensor.h"
 #include "matx/core/tie.h"
 #include "matx/core/utils.h"
 #include "matx/core/viz.h"
diff --git a/include/matx/core/make_sparse_tensor.h b/include/matx/core/make_sparse_tensor.h
@@ -0,0 +1,104 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2025, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "matx/core/sparse_tensor.h"
+
+namespace matx {
+namespace experimental {
+
+//
+// MatX uses a single versatile sparse tensor type that uses a tensor format
+// DSL (Domain Specific Language) to describe a vast space of storage formats.
+// This file provides a number of convenience factory methods that construct
+// sparse tensors in well-known storage formats, like COO, CSR, and CSC,
+// directly from the constituent buffers. More factory methods can easily be
+// added as the need arises.
+//
+
+// Constructs a sparse matrix in COO format directly from the values and
+// the two coordinates vectors. The entries should be sorted by row, then
+// column. Duplicate entries should not occur. Explicit zeros may be stored.
+template <typename ValTensor, typename CrdTensor>
+auto make_tensor_coo(ValTensor &val, CrdTensor &row, CrdTensor &col,
+                     const index_t (&shape)[2], bool owning = false) {
+  static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1);
+  using VAL = typename ValTensor::value_type;
+  using CRD = typename CrdTensor::value_type;
+  using POS = int; // no positions, although some forms use [0,nse]
+  raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
+  basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
+  return sparse_tensor_t<VAL, CRD, POS, COO>(
+      shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {ep, ep});
+}
+
+// Constructs a sparse matrix in CSR format directly from the values, the row
+// positions, and column coordinates vectors. The entries should be sorted by
+// row, then column. Explicit zeros may be stored.
+template <typename ValTensor, typename PosTensor, typename CrdTensor>
+auto make_tensor_csr(ValTensor &val, PosTensor &rowp, CrdTensor &col,
+                     const index_t (&shape)[2], bool owning = false) {
+  static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1 &&
+                PosTensor::Rank() == 1);
+  using VAL = typename ValTensor::value_type;
+  using CRD = typename CrdTensor::value_type;
+  using POS = typename PosTensor::value_type;
+  raw_pointer_buffer<CRD, matx_allocator<CRD>> emptyc{nullptr, 0, owning};
+  basic_storage<decltype(emptyc)> ec{std::move(emptyc)};
+  raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
+  basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
+  return sparse_tensor_t<VAL, CRD, POS, CSR>(
+      shape, val.GetStorage(), {ec, col.GetStorage()}, {ep, rowp.GetStorage()});
+}
+
+// Constructs a sparse matrix in CSC format directly from the values,
+// the row coordinates, and column position vectors. The entries should
+// be sorted by column, then row. Explicit zeros may be stored.
+template <typename ValTensor, typename PosTensor, typename CrdTensor>
+auto make_tensor_csc(ValTensor &val, CrdTensor &row, PosTensor &colp,
+                     const index_t (&shape)[2], bool owning = false) {
+  static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1 &&
+                PosTensor::Rank() == 1);
+  using VAL = typename ValTensor::value_type;
+  using CRD = typename CrdTensor::value_type;
+  using POS = typename PosTensor::value_type;
+  raw_pointer_buffer<CRD, matx_allocator<CRD>> emptyc{nullptr, 0, owning};
+  basic_storage<decltype(emptyc)> ec{std::move(emptyc)};
+  raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
+  basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
+  return sparse_tensor_t<VAL, CRD, POS, CSC>(
+      shape, val.GetStorage(), {ec, row.GetStorage()}, {ep, colp.GetStorage()});
+}
+
+} // namespace experimental
+} // namespace matx
diff --git a/include/matx/core/print.h b/include/matx/core/print.h
@@ -136,11 +136,17 @@ namespace matx {
 
     template <typename Op>
     void PrintShapeImpl(const Op& op, FILE *fp) {
-      if (is_tensor_view_v<Op>) {
-        fprintf(fp, "%s: ",op.str().c_str());
+      if constexpr (is_tensor_view_v<Op>) {
+        fprintf(fp, "%s: ", op.str().c_str());
       }
 
-      std::string type = (is_tensor_view_v<Op>) ? "Tensor" : "Operator";
+      std::string type;
+      if constexpr (is_sparse_tensor_v<Op>)
+        type = "SparseTensor";
+      else if constexpr (is_tensor_view_v<Op>)
+        type = "Tensor";
+      else
+        type = "Operator";
       fprintf(fp, "%s{%s} Rank: %d, Sizes:[", type.c_str(), detail::GetTensorTypeString<typename Op::value_type>().c_str(), op.Rank());
       for (index_t dimIdx = 0; dimIdx < op.Rank(); dimIdx++)
       {
@@ -149,7 +155,25 @@ namespace matx {
           fprintf(fp, ", ");
       }
 
-      if constexpr (is_tensor_view_v<Op>)
+      if constexpr (is_sparse_tensor_v<Op>)
+      {
+        // A sparse tensor has no strides, so show the level sizes instead.
+        // These are obtained by translating dims to levels using the format.
+        index_t dims[Op::Format::DIM];
+        index_t lvls[Op::Format::LVL];
+        for (int dimIdx = 0; dimIdx < Op::Format::DIM; dimIdx++) {
+          dims[dimIdx] = op.Size(dimIdx);
+        }
+	Op::Format::dim2lvl(dims, lvls, /*asSize=*/true);
+        fprintf(fp, "], Levels:[");
+        for (int lvlIdx = 0; lvlIdx < Op::Format::LVL; lvlIdx++) {
+          fprintf(fp, "%" MATX_INDEX_T_FMT, lvls[lvlIdx]);
+          if (lvlIdx < (Op::Format::LVL - 1)) {
+            fprintf(fp, ", ");
+          }
+        }
+      }
+      else if constexpr (is_tensor_view_v<Op>)
       {
         fprintf(fp, "], Strides:[");
         if constexpr (Op::Rank() > 0)
@@ -543,7 +567,37 @@ namespace matx {
 
     #ifdef __CUDACC__
       cudaDeviceSynchronize();
-      if constexpr (is_tensor_view_v<Op>) {
+      if constexpr (is_sparse_tensor_v<Op>) {
+        using Format = typename Op::Format;
+	index_t nse = op.Nse();
+        fprintf(fp, "nse    = %" MATX_INDEX_T_FMT "\n", nse);
+        fprintf(fp, "format = ");
+	Format::print();
+        for (int lvlIdx = 0; lvlIdx < Format::LVL; lvlIdx++) {
+	  if (op.POSData(lvlIdx)) {
+            const index_t pend = op.posSize(lvlIdx);
+            fprintf(fp, "pos[%d] = (", lvlIdx);
+            for (index_t i = 0; i < pend; i++) {
+              PrintVal(fp, op.POSData(lvlIdx)[i]);
+            }
+            fprintf(fp, ")\n");
+          }
+          if (op.CRDData(lvlIdx)) {
+            const index_t cend = op.crdSize(lvlIdx);
+            fprintf(fp, "crd[%d] = (", lvlIdx);
+            for (index_t i = 0; i < cend; i++) {
+              PrintVal(fp, op.CRDData(lvlIdx)[i]);
+            }
+            fprintf(fp, ")\n");
+          }
+        }
+        fprintf(fp, "values = (");
+        for (index_t i = 0; i < nse; i++) {
+          PrintVal(fp, op.Data()[i]);
+        }
+        fprintf(fp, ")\nspace  = %s\n", SpaceString(GetPointerKind(op.Data())).c_str());
+      }
+      else if constexpr (is_tensor_view_v<Op>) {
         // If the user is printing a tensor with a const pointer underlying the data, we need to do the lookup
         // as if it's not const. This is because the ownership decision is done at runtime instead of compile-time,
         // so even though the lookup will never be done, the compilation path happens.
diff --git a/include/matx/core/sparse_tensor.h b/include/matx/core/sparse_tensor.h
diff --git a/include/matx/core/sparse_tensor_format.h b/include/matx/core/sparse_tensor_format.h