Skip to content

Commit 68a8c12

Browse files
authored
Implement the () operator on sparse tensors (#837)
Note that is a fully functional "locator" for the () operator that works for *all* versatile sparse tensors. Currently, the operator is defined at sparse_tensor level, but it should be moved into tensor_impl after this. Also, clients should always be aware that the () operator for compressed levels are *not* random-access, but involve a search to find if the element is stored.
1 parent aada972 commit 68a8c12

File tree

4 files changed

+110
-3
lines changed

4 files changed

+110
-3
lines changed

examples/sparse_tensor.cu

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,24 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
9797
//
9898
print(Acoo);
9999

100+
//
101+
// A very naive way to convert the sparse matrix back to a dense
102+
// matrix. Note that one should **never** use the ()-operator in
103+
// performance critical code, since sparse data structures do
104+
// not provide O(1) random access to their elements (compressed
105+
// levels will use some form of search to determine if an element
106+
// is present). Instead, conversions (and other operations) should
107+
// use sparse operations that are tailored for the sparse data
108+
// structure (such as scanning by row for CSR).
109+
//
110+
tensor_t<float, 2> Dense{{m, n}};
111+
for (index_t i = 0; i < m; i++) {
112+
for (index_t j = 0; j < n; j++) {
113+
Dense(i, j) = Acoo(i, j);
114+
}
115+
}
116+
print(Dense);
117+
100118
// TODO: operations on Acoo
101119

102120
MATX_EXIT_HANDLER();

include/matx/core/make_sparse_tensor.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,23 @@ auto make_tensor_coo(ValTensor &val, CrdTensor &row, CrdTensor &col,
5555
static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1);
5656
using VAL = typename ValTensor::value_type;
5757
using CRD = typename CrdTensor::value_type;
58-
using POS = int; // no positions, although some forms use [0,nse]
58+
using POS = index_t;
59+
// Note that the COO API typically does not involve positions.
60+
// However, under the formal DSL specifications, the top level
61+
// compression should set up pos[0] = {0, nse}. This is done
62+
// here, using the same memory space as the other data.
63+
POS *ptr;
64+
matxMemorySpace_t space = GetPointerKind(val.GetStorage().data());
65+
matxAlloc((void **)&ptr, 2 * sizeof(POS), space, 0);
66+
ptr[0] = 0;
67+
ptr[1] = val.Size(0);
68+
raw_pointer_buffer<POS, matx_allocator<POS>> topp{ptr, 2 * sizeof(POS),
69+
owning};
70+
basic_storage<decltype(topp)> tp{std::move(topp)};
5971
raw_pointer_buffer<POS, matx_allocator<POS>> emptyp{nullptr, 0, owning};
6072
basic_storage<decltype(emptyp)> ep{std::move(emptyp)};
6173
return sparse_tensor_t<VAL, CRD, POS, COO>(
62-
shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {ep, ep});
74+
shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {tp, ep});
6375
}
6476

6577
// Constructs a sparse matrix in CSR format directly from the values, the row

include/matx/core/sparse_tensor.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,82 @@ class sparse_tensor_t
109109
index_t crdSize(int l) const { return coordinates_[l].size() / sizeof(CRD); }
110110
index_t posSize(int l) const { return positions_[l].size() / sizeof(POS); }
111111

112+
// Locates position of an element at given indices, or returns -1 when not
113+
// found.
114+
template <int L = 0>
115+
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t
116+
GetPos(index_t *lvlsz, index_t *lvl, index_t pos) const {
117+
if constexpr (L < LVL) {
118+
using ftype = std::tuple_element_t<L, typename TF::LVLSPECS>;
119+
if constexpr (ftype::lvltype == LvlType::Dense) {
120+
// Dense level: pos * size + i.
121+
// TODO: see below, use a constexpr GetLvlSize(L) instead?
122+
const index_t dpos = pos * lvlsz[L] + lvl[L];
123+
if constexpr (L + 1 < LVL) {
124+
return GetPos<L + 1>(lvlsz, lvl, dpos);
125+
} else {
126+
return dpos;
127+
}
128+
} else if constexpr (ftype::lvltype == LvlType::Singleton) {
129+
// Singleton level: pos if crd[pos] == i and next levels match.
130+
if (this->CRDData(L)[pos] == lvl[L]) {
131+
if constexpr (L + 1 < LVL) {
132+
return GetPos<L + 1>(lvlsz, lvl, pos);
133+
} else {
134+
return pos;
135+
}
136+
}
137+
} else if constexpr (ftype::lvltype == LvlType::Compressed ||
138+
ftype::lvltype == LvlType::CompressedNonUnique) {
139+
// Compressed level: scan for match on i and test next levels.
140+
const CRD *c = this->CRDData(L);
141+
const POS *p = this->POSData(L);
142+
for (index_t pp = p[pos], hi = p[pos + 1]; pp < hi; pp++) {
143+
if (c[pp] == lvl[L]) {
144+
if constexpr (L + 1 < LVL) {
145+
const index_t cpos = GetPos<L + 1>(lvlsz, lvl, pp);
146+
if constexpr (ftype::lvltype == LvlType::Compressed) {
147+
return cpos; // always end scan (unique)
148+
} else if (cpos != -1) {
149+
return cpos; // only end scan on success (non-unique)
150+
}
151+
} else {
152+
return pp;
153+
}
154+
}
155+
}
156+
}
157+
}
158+
return -1; // not found
159+
}
160+
161+
// Element getter (viz. "lhs = Acoo(0,0);"). Note that due to the compact
162+
// nature of sparse data structures, these storage formats do not provide
163+
// cheap random access to their elements. Instead, the implementation will
164+
// search for a stored element at the given position (which involves a scan
165+
// at each compressed level). The implicit value zero is returned when the
166+
// element cannot be found. So, although functional for testing, clients
167+
// should avoid using getters inside performance critial regions, since
168+
// the implementation is far worse than O(1).
169+
template <typename... Is>
170+
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ VAL
171+
operator()(Is... indices) const noexcept {
172+
static_assert(
173+
sizeof...(Is) == DIM,
174+
"Number of indices of operator() must match rank of sparse tensor");
175+
cuda::std::array<index_t, DIM> dim{indices...};
176+
cuda::std::array<index_t, LVL> lvl;
177+
cuda::std::array<index_t, LVL> lvlsz;
178+
TF::dim2lvl(dim.data(), lvl.data(), /*asSize=*/false);
179+
// TODO: only compute once and provide a constexpr LvlSize(l) instead?
180+
TF::dim2lvl(this->Shape().data(), lvlsz.data(), /*asSize=*/true);
181+
const index_t pos = GetPos(lvlsz.data(), lvl.data(), 0);
182+
if (pos != -1) {
183+
return this->Data()[pos];
184+
}
185+
return static_cast<VAL>(0); // implicit zero
186+
}
187+
112188
private:
113189
// Primary storage of sparse tensor (explicitly stored element values).
114190
StorageV values_;

include/matx/core/tensor_impl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,8 @@ MATX_IGNORE_WARNING_POP_GCC
637637
* @return
638638
* A shape of the data with the appropriate dimensions set
639639
*/
640-
__MATX_INLINE__ auto Shape() const noexcept { return this->desc_.Shape(); }
640+
__MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__
641+
auto Shape() const noexcept { return this->desc_.Shape(); }
641642

642643
/**
643644
* Get the strides the tensor from the underlying data

0 commit comments

Comments
 (0)