Skip to content

Commit 8b75eae

Browse files
authored
Merge branch 'main' into cccl_std_conversion
2 parents 67e5d54 + 75b032d commit 8b75eae

File tree

6 files changed

+79
-59
lines changed

6 files changed

+79
-59
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ option(MATX_EN_CUTENSOR OFF)
2323
option(MATX_EN_FILEIO OFF)
2424
option(MATX_EN_NVPL OFF, "Enable NVIDIA Performance Libraries for optimized ARM CPU support")
2525
option(MATX_DISABLE_CUB_CACHE "Disable caching for CUB allocations" ON)
26+
option(MATX_EN_COVERAGE OFF "Enable code coverage reporting")
2627

2728
set(MATX_EN_PYBIND11 OFF CACHE BOOL "Enable pybind11 support")
2829

@@ -181,6 +182,11 @@ if (MATX_DISABLE_CUB_CACHE)
181182
target_compile_definitions(matx INTERFACE MATX_DISABLE_CUB_CACHE=1)
182183
endif()
183184

185+
if (MATX_EN_COVERAGE)
186+
target_compile_options(matx INTERFACE -fprofile-arcs -ftest-coverage)
187+
target_link_options(matx INTERFACE -lgcov --coverage)
188+
endif()
189+
184190
# Get the tensor libraries if we need them
185191
if (MATX_EN_CUTENSOR)
186192
set(CUTENSORNET_VERSION 24.03.0.4)

bench/00_transform/svd_power.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ void svdbpi_batch(nvbench::state &state,
102102

103103
MATX_NVTX_START_RANGE( "Exec", matx_nvxtLogLevels::MATX_NVTX_LOG_ALL, 1 )
104104
state.exec(
105-
[&U, &S, &VT, &A, &iterations, &r](nvbench::launch &launch) {
105+
[&U, &S, &VT, &A, &iterations](nvbench::launch &launch) {
106106
(mtie(U, S, VT) = svdbpi(A, iterations)).run(cudaExecutor{launch.get_stream()}); });
107107
MATX_NVTX_END_RANGE( 1 )
108108
}

include/matx/core/stacktrace.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,25 @@
4747
#include <iostream>
4848
#include <sstream>
4949
#include <string>
50+
#include <array>
5051

5152
namespace matx {
5253
namespace detail{
5354

55+
static constexpr int MAX_FRAMES = 63;
56+
5457
/** Print a demangled stack backtrace of the caller function to FILE* out. */
55-
static inline void printStackTrace(std::ostream &eout = std::cerr,
56-
unsigned int max_frames = 63)
58+
static inline void printStackTrace(std::ostream &eout = std::cerr)
5759
{
5860
#ifdef _WIN32
5961
// TODO add code for windows stack trace
6062
#else
6163
std::stringstream out;
6264
// storage array for stack trace address data
63-
void *addrlist[max_frames + 1];
65+
std::array<void *, MAX_FRAMES + 1> addrlist;
6466
// retrieve current stack addresses
6567
int addrlen =
66-
backtrace(addrlist, static_cast<int>(sizeof(addrlist) / sizeof(void *)));
68+
backtrace(reinterpret_cast<void **>(&addrlist), static_cast<int>(addrlist.size()));
6769

6870
if (addrlen == 0) {
6971
out << " <empty, possibly corrupt>\n";
@@ -72,7 +74,7 @@ static inline void printStackTrace(std::ostream &eout = std::cerr,
7274

7375
// resolve addresses into strings containing "filename(function+address)",
7476
// this array must be free()-ed
75-
char **symbollist = backtrace_symbols(addrlist, addrlen);
77+
char **symbollist = backtrace_symbols(reinterpret_cast<void *const *>(&addrlist), addrlen);
7678
// allocate string which will be filled with the demangled function name
7779
size_t funcnamesize = 256;
7880
char *funcname = (char *)malloc(funcnamesize);

include/matx/core/tensor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1466,7 +1466,7 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
14661466
int d = 0;
14671467
bool def_stride = (strides[0] == -1);
14681468

1469-
int end_count = 0;
1469+
[[maybe_unused]] int end_count = 0;
14701470
for (int i = 0; i < RANK; i++) {
14711471
if (ends[i] == matxDropDim) {
14721472
end_count++;

include/matx/core/tensor_utils.h

Lines changed: 15 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -186,13 +186,6 @@ namespace detail {
186186
else {
187187
return cuda::std::invoke(std::forward<Func>(f), cuda::std::get<S>(std::forward<Tuple>(tuple))...);
188188
}
189-
190-
if constexpr (!(is_std_tuple<remove_cvref_t<Tuple>>::value || is_std_array<remove_cvref_t<Tuple>>::value)) {
191-
return cuda::std::invoke(std::forward<Func>(f), cuda::std::get<S>(std::forward<Tuple>(tuple))...);
192-
}
193-
else {
194-
return cuda::std::invoke(std::forward<Func>(f), cuda::std::get<S>(std::forward<Tuple>(tuple))...);
195-
}
196189
}
197190

198191
template <class Func, class Tuple>
@@ -208,17 +201,6 @@ namespace detail {
208201
std::forward<Func>(f), std::forward<Tuple>(t),
209202
std::make_index_sequence<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
210203
}
211-
212-
if constexpr (!(is_std_tuple<remove_cvref_t<Tuple>>::value || is_std_array<remove_cvref_t<Tuple>>::value)) {
213-
return apply_impl(
214-
std::forward<Func>(f), std::forward<Tuple>(t),
215-
std::make_index_sequence<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
216-
}
217-
else {
218-
return apply_impl(
219-
std::forward<Func>(f), std::forward<Tuple>(t),
220-
std::make_index_sequence<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
221-
}
222204
}
223205

224206
template <class Func, class Tuple>
@@ -234,17 +216,6 @@ namespace detail {
234216
std::forward<Func>(f), std::forward<Tuple>(t),
235217
make_index_sequence_rev<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
236218
}
237-
238-
if constexpr (!(is_std_tuple<remove_cvref_t<Tuple>>::value || is_std_array<remove_cvref_t<Tuple>>::value)) {
239-
return apply_impl(
240-
std::forward<Func>(f), std::forward<Tuple>(t),
241-
make_index_sequence_rev<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
242-
}
243-
else {
244-
return apply_impl(
245-
std::forward<Func>(f), std::forward<Tuple>(t),
246-
make_index_sequence_rev<cuda::std::tuple_size_v<remove_cvref_t<Tuple>>>{});
247-
}
248219
}
249220

250221
template <typename T0, typename T1, typename... Tn>
@@ -332,20 +303,6 @@ namespace detail {
332303
return i(args...);
333304
}, sliced_tup);
334305
}
335-
336-
if constexpr (!(T::Rank() == int(sizeof...(Is)) || T::Rank() == matxNoRank)) {
337-
// Construct an integer sequence of the length of the tuple, but only using the last indices
338-
using seq = offset_sequence_t<sizeof...(Is) - T::Rank(), std::make_index_sequence<T::Rank()>>;
339-
auto tup = cuda::std::make_tuple(indices...);
340-
auto sliced_tup = select_tuple(std::forward<decltype(tup)>(tup), seq{});
341-
return cuda::std::apply([&](auto... args) {
342-
return i(args...);
343-
}, sliced_tup);
344-
}
345-
else
346-
{
347-
return i(indices...);
348-
}
349306
}
350307

351308

@@ -360,15 +317,6 @@ namespace detail {
360317
{
361318
return i;
362319
}
363-
364-
if constexpr (!is_matx_op<T>())
365-
{
366-
return i;
367-
}
368-
else
369-
{
370-
return get_matx_value(i, indices...);
371-
}
372320
}
373321

374322
template <typename T> __MATX_INLINE__ std::string to_short_str() {
@@ -1184,6 +1132,21 @@ void print(const Op &op, [[maybe_unused]] Args... dims) {
11841132
cuda::std::apply([&](auto &&...args) { fprint(stdout, op, args...); }, tp);
11851133
}
11861134

1135+
/**
1136+
* @brief Print a tensor's all values to stdout
1137+
*
1138+
* This form of `print()` is a specialization for 0D tensors.
1139+
*
1140+
* @tparam Op Operator input type
1141+
* @param op Operator input
1142+
*/
1143+
template <typename Op,
1144+
std::enable_if_t<(Op::Rank() == 0), bool> = true>
1145+
void print(const Op &op)
1146+
{
1147+
fprint(stdout, op);
1148+
}
1149+
11871150
#endif // not DOXYGEN_ONLY
11881151

11891152
template <typename Op>

test/00_io/PrintTests.cu

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,21 @@ TEST_F(PrintTest, DefaultTest4)
187187
MATX_EXIT_HANDLER();
188188
}
189189

190+
TEST_F(PrintTest, DefaultTest5)
191+
{
192+
MATX_ENTER_HANDLER();
193+
auto pft = get_print_format_type();
194+
ASSERT_EQ(MATX_PRINT_FORMAT_DEFAULT, pft);
195+
196+
auto testSlice = matx::slice<0>(A1, {0}, {matx::matxDropDim});
197+
198+
print_checker(testSlice,
199+
"Tensor{complex<double>} Rank: 0, Sizes:[], Strides:[]\n"
200+
"-9.2466e-01+9.9114e-01j \n");
201+
202+
MATX_EXIT_HANDLER();
203+
}
204+
190205
TEST_F(PrintTest, MlabTest1)
191206
{
192207
MATX_ENTER_HANDLER();
@@ -281,6 +296,22 @@ TEST_F(PrintTest, MlabTest4)
281296
MATX_EXIT_HANDLER();
282297
}
283298

299+
TEST_F(PrintTest, MlabTest5)
300+
{
301+
MATX_ENTER_HANDLER();
302+
set_print_format_type(MATX_PRINT_FORMAT_MLAB);
303+
auto pft = get_print_format_type();
304+
ASSERT_EQ(MATX_PRINT_FORMAT_MLAB, pft);
305+
306+
auto testSlice = matx::slice<0>(A1, {0}, {matx::matxDropDim});
307+
308+
print_checker(testSlice,
309+
"Tensor{complex<double>} Rank: 0, Sizes:[], Strides:[]\n"
310+
"-9.2466e-01+9.9114e-01j \n");
311+
312+
MATX_EXIT_HANDLER();
313+
}
314+
284315
TEST_F(PrintTest, PythonTest1)
285316
{
286317
MATX_ENTER_HANDLER();
@@ -370,3 +401,21 @@ TEST_F(PrintTest, PythonTest4)
370401

371402
MATX_EXIT_HANDLER();
372403
}
404+
405+
TEST_F(PrintTest, PythonTest5)
406+
{
407+
MATX_ENTER_HANDLER();
408+
set_print_format_type(MATX_PRINT_FORMAT_PYTHON);
409+
auto pft = get_print_format_type();
410+
ASSERT_EQ(MATX_PRINT_FORMAT_PYTHON, pft);
411+
412+
auto testSlice = matx::slice<0>(A1, {0}, {matx::matxDropDim});
413+
414+
print_checker(testSlice,
415+
"Tensor{complex<double>} Rank: 0, Sizes:[], Strides:[]\n"
416+
"-9.2466e-01+9.9114e-01j \n");
417+
418+
MATX_EXIT_HANDLER();
419+
}
420+
421+

0 commit comments

Comments
 (0)