Skip to content

Commit 60550be

Browse files
committed
introduce a versatile sparse tensor type to MatX (experimental)
This PR introduces the implementation of a single versatile sparse tensor type that uses a tensor format DSL (Domain Specific Language) to describe a vast space of storage formats. Although the tensor format can easily define many common storage formats (such as Dense, COO, CSR, CSC, BSR), it can also define many less common storage formats. In addition, the tensor format DSL can be extended to include even more storage formats in the future. This first PR simply introduces all storage details for the single versatile sparse tensor type, together with some factory methods for constructing COO, CSR, and CSC sparse matrices from MatX buffers. Later PRs will introduce more general ways of constructing sparse tensors (e.g. from file) and actual operations like SpMV and SpMM using cuSPARSE.
1 parent d71f0dd commit 60550be

File tree

7 files changed

+821
-5
lines changed

7 files changed

+821
-5
lines changed

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(examples
1111
mvdr_beamformer
1212
pwelch
1313
resample_poly_bench
14+
sparse_tensor
1415
spectrogram
1516
spectrogram_graph
1617
spherical_harmonics

examples/sparse_tensor.cu

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
// BSD 3-Clause License
3+
//
4+
// Copyright (c) 2025, NVIDIA Corporation
5+
// All rights reserved.
6+
//
7+
// Redistribution and use in source and binary forms, with or without
8+
// modification, are permitted provided that the following conditions are met:
9+
//
10+
// 1. Redistributions of source code must retain the above copyright notice, this
11+
// list of conditions and the following disclaimer.
12+
//
13+
// 2. Redistributions in binary form must reproduce the above copyright notice,
14+
// this list of conditions and the following disclaimer in the documentation
15+
// and/or other materials provided with the distribution.
16+
//
17+
// 3. Neither the name of the copyright holder nor the names of its
18+
// contributors may be used to endorse or promote products derived from
19+
// this software without specific prior written permission.
20+
//
21+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24+
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25+
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28+
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29+
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
/////////////////////////////////////////////////////////////////////////////////
32+
33+
#include "matx.h"
34+
35+
using namespace matx;
36+
37+
int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
38+
{
39+
MATX_ENTER_HANDLER();
40+
41+
cudaStream_t stream = 0;
42+
cudaExecutor exec{stream};
43+
44+
//
45+
// Creates a COO matrix for the following 4x8 dense matrix with 5 nonzero
46+
// elements, using the factory method that uses MatX tensors for the 1-dim
47+
// buffers. The sparse matrix resides in the same memory space as its buffer
48+
// constituents.
49+
//
50+
// | 1, 2, 0, 0, 0, 0, 0, 0 |
51+
// | 0, 0, 0, 0, 0, 0, 0, 0 |
52+
// | 0, 0, 0, 0, 0, 0, 0, 0 |
53+
// | 0, 0, 3, 4, 0, 5, 0, 0 |
54+
//
55+
56+
constexpr index_t m = 4;
57+
constexpr index_t n = 8;
58+
constexpr index_t nse = 5;
59+
60+
tensor_t<float, 1> values{{nse}};
61+
tensor_t<int, 1> row_idx{{nse}};
62+
tensor_t<int, 1> col_idx{{nse}};
63+
64+
values.SetVals({ 1, 2, 3, 4, 5 });
65+
row_idx.SetVals({ 0, 0, 3, 3, 3 });
66+
col_idx.SetVals({ 0, 1, 2, 3, 5 });
67+
68+
// Note that sparse tensor support in MatX is still experimental.
69+
auto Acoo = experimental::make_tensor_coo(values, row_idx, col_idx, {m, n});
70+
71+
//
72+
// This shows:
73+
//
74+
// tensor_impl_2_f32: Tensor{float} Rank: 2, Sizes:[4, 8], Levels:[4, 8]
75+
// nse = 5
76+
// format = ( d0, d1 ) -> ( d0 : compressed(non-unique), d1 : singleton )
77+
// crd[0] = ( 0 0 3 3 3 )
78+
// crd[1] = ( 0 1 2 3 5 )
79+
// values = ( 1.0000e+00 2.0000e+00 3.0000e+00 4.0000e+00 5.0000e+00 )
80+
// space = CUDA managed memory
81+
//
82+
print(Acoo);
83+
84+
// TODO: operations on Acoo
85+
86+
MATX_EXIT_HANDLER();
87+
}

include/matx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
#include "matx/core/print.h"
4646
#include "matx/core/pybind.h"
4747
#include "matx/core/tensor.h"
48+
#include "matx/core/sparse_tensor.h" // sparse support is experimental
49+
#include "matx/core/make_sparse_tensor.h"
4850
#include "matx/core/tie.h"
4951
#include "matx/core/utils.h"
5052
#include "matx/core/viz.h"
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
// BSD 3-Clause License
3+
//
4+
// Copyright (c) 2025, NVIDIA Corporation
5+
// All rights reserved.
6+
//
7+
// Redistribution and use in source and binary forms, with or without
8+
// modification, are permitted provided that the following conditions are met:
9+
//
10+
// 1. Redistributions of source code must retain the above copyright notice, this
11+
// list of conditions and the following disclaimer.
12+
//
13+
// 2. Redistributions in binary form must reproduce the above copyright notice,
14+
// this list of conditions and the following disclaimer in the documentation
15+
// and/or other materials provided with the distribution.
16+
//
17+
// 3. Neither the name of the copyright holder nor the names of its
18+
// contributors may be used to endorse or promote products derived from
19+
// this software without specific prior written permission.
20+
//
21+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24+
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25+
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28+
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29+
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
/////////////////////////////////////////////////////////////////////////////////
32+
33+
#pragma once
34+
35+
#include "matx/core/sparse_tensor.h"
36+
37+
namespace matx {
38+
namespace experimental {
39+
40+
//
41+
// MatX uses a single versatile sparse tensor type that uses a tensor format
42+
// DSL (Domain Specific Language) to describe a vast space of storage formats.
43+
// This file provides a number of convenience factory methods that construct
44+
// sparse tensors in well-known storage formats, like COO, CSR, and CSC,
45+
// directly from the constituent buffers. More factory methods can easily be
46+
// added as the need arises.
47+
//
48+
49+
// Constructs a sparse matrix in COO format directly from the values and
50+
// the two coordinates vectors. The entries should be sorted by row, then
51+
// column. Duplicate entries should not occur. Explicit zeros may be stored.
52+
template <typename ValTensor, typename CrdTensor>
53+
auto make_tensor_coo(ValTensor &val, CrdTensor &row, CrdTensor &col,
54+
const index_t (&shape)[2], bool owning = false) {
55+
static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1);
56+
using VAL = typename ValTensor::value_type;
57+
using CRD = typename CrdTensor::value_type;
58+
using POS = int; // no positions, although some forms use [0,nse]
59+
raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
60+
basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
61+
return sparse_tensor_t<VAL, CRD, POS, COO>(
62+
shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {ep, ep});
63+
}
64+
65+
// Constructs a sparse matrix in CSR format directly from the values, the row
66+
// positions, and column coordinates vectors. The entries should be sorted by
67+
// row, then column. Explicit zeros may be stored.
68+
template <typename ValTensor, typename PosTensor, typename CrdTensor>
69+
auto make_tensor_csr(ValTensor &val, PosTensor &rowp, CrdTensor &col,
70+
const index_t (&shape)[2], bool owning = false) {
71+
static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1 &&
72+
PosTensor::Rank() == 1);
73+
using VAL = typename ValTensor::value_type;
74+
using CRD = typename CrdTensor::value_type;
75+
using POS = typename PosTensor::value_type;
76+
raw_pointer_buffer<CRD, matx_allocator<CRD>> emptyc{nullptr, 0, owning};
77+
basic_storage<decltype(emptyc)> ec{std::move(emptyc)};
78+
raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
79+
basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
80+
return sparse_tensor_t<VAL, CRD, POS, CSR>(
81+
shape, val.GetStorage(), {ec, col.GetStorage()}, {ep, rowp.GetStorage()});
82+
}
83+
84+
// Constructs a sparse matrix in CSC format directly from the values,
85+
// the row coordinates, and column position vectors. The entries should
86+
// be sorted by column, then row. Explicit zeros may be stored.
87+
template <typename ValTensor, typename PosTensor, typename CrdTensor>
88+
auto make_tensor_csc(ValTensor &val, CrdTensor &row, PosTensor &colp,
89+
const index_t (&shape)[2], bool owning = false) {
90+
static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1 &&
91+
PosTensor::Rank() == 1);
92+
using VAL = typename ValTensor::value_type;
93+
using CRD = typename CrdTensor::value_type;
94+
using POS = typename PosTensor::value_type;
95+
raw_pointer_buffer<CRD, matx_allocator<CRD>> emptyc{nullptr, 0, owning};
96+
basic_storage<decltype(emptyc)> ec{std::move(emptyc)};
97+
raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
98+
basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
99+
return sparse_tensor_t<VAL, CRD, POS, CSC>(
100+
shape, val.GetStorage(), {ec, row.GetStorage()}, {ep, colp.GetStorage()});
101+
}
102+
103+
} // namespace experimental
104+
} // namespace matx

include/matx/core/print.h

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,17 @@ namespace matx {
136136

137137
template <typename Op>
138138
void PrintShapeImpl(const Op& op, FILE *fp) {
139-
if (is_tensor_view_v<Op>) {
140-
fprintf(fp, "%s: ",op.str().c_str());
139+
if constexpr (is_tensor_view_v<Op>) {
140+
fprintf(fp, "%s: ", op.str().c_str());
141141
}
142142

143-
std::string type = (is_tensor_view_v<Op>) ? "Tensor" : "Operator";
143+
std::string type;
144+
if constexpr (is_sparse_tensor_v<Op>)
145+
type = "SparseTensor";
146+
else if constexpr (is_tensor_view_v<Op>)
147+
type = "Tensor";
148+
else
149+
type = "Operator";
144150
fprintf(fp, "%s{%s} Rank: %d, Sizes:[", type.c_str(), detail::GetTensorTypeString<typename Op::value_type>().c_str(), op.Rank());
145151
for (index_t dimIdx = 0; dimIdx < op.Rank(); dimIdx++)
146152
{
@@ -149,7 +155,25 @@ namespace matx {
149155
fprintf(fp, ", ");
150156
}
151157

152-
if constexpr (is_tensor_view_v<Op>)
158+
if constexpr (is_sparse_tensor_v<Op>)
159+
{
160+
// A sparse tensor has no strides, so show the level sizes instead.
161+
// These are obtained by translating dims to levels using the format.
162+
index_t dims[Op::Format::DIM];
163+
index_t lvls[Op::Format::LVL];
164+
for (int dimIdx = 0; dimIdx < Op::Format::DIM; dimIdx++) {
165+
dims[dimIdx] = op.Size(dimIdx);
166+
}
167+
Op::Format::dim2lvl(dims, lvls, /*asSize=*/true);
168+
fprintf(fp, "], Levels:[");
169+
for (int lvlIdx = 0; lvlIdx < Op::Format::LVL; lvlIdx++) {
170+
fprintf(fp, "%" MATX_INDEX_T_FMT, lvls[lvlIdx]);
171+
if (lvlIdx < (Op::Format::LVL - 1)) {
172+
fprintf(fp, ", ");
173+
}
174+
}
175+
}
176+
else if constexpr (is_tensor_view_v<Op>)
153177
{
154178
fprintf(fp, "], Strides:[");
155179
if constexpr (Op::Rank() > 0)
@@ -543,7 +567,37 @@ namespace matx {
543567

544568
#ifdef __CUDACC__
545569
cudaDeviceSynchronize();
546-
if constexpr (is_tensor_view_v<Op>) {
570+
if constexpr (is_sparse_tensor_v<Op>) {
571+
using Format = typename Op::Format;
572+
index_t nse = op.Nse();
573+
fprintf(fp, "nse = %" MATX_INDEX_T_FMT "\n", nse);
574+
fprintf(fp, "format = ");
575+
Format::print();
576+
for (int lvlIdx = 0; lvlIdx < Format::LVL; lvlIdx++) {
577+
if (op.POSData(lvlIdx)) {
578+
const index_t pend = op.posSize(lvlIdx);
579+
fprintf(fp, "pos[%d] = (", lvlIdx);
580+
for (index_t i = 0; i < pend; i++) {
581+
PrintVal(fp, op.POSData(lvlIdx)[i]);
582+
}
583+
fprintf(fp, ")\n");
584+
}
585+
if (op.CRDData(lvlIdx)) {
586+
const index_t cend = op.crdSize(lvlIdx);
587+
fprintf(fp, "crd[%d] = (", lvlIdx);
588+
for (index_t i = 0; i < cend; i++) {
589+
PrintVal(fp, op.CRDData(lvlIdx)[i]);
590+
}
591+
fprintf(fp, ")\n");
592+
}
593+
}
594+
fprintf(fp, "values = (");
595+
for (index_t i = 0; i < nse; i++) {
596+
PrintVal(fp, op.Data()[i]);
597+
}
598+
fprintf(fp, ")\nspace = %s\n", SpaceString(GetPointerKind(op.Data())).c_str());
599+
}
600+
else if constexpr (is_tensor_view_v<Op>) {
547601
// If the user is printing a tensor with a const pointer underlying the data, we need to do the lookup
548602
// as if it's not const. This is because the ownership decision is done at runtime instead of compile-time,
549603
// so even though the lookup will never be done, the compilation path happens.

0 commit comments

Comments
 (0)