Skip to content

Commit ef96ce1

Browse files
committed
Added isclose and allclose functions
1 parent 82edd4f commit ef96ce1

File tree

8 files changed

+302
-0
lines changed

8 files changed

+302
-0
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.. _isclose_func:
2+
3+
isclose
4+
=======
5+
6+
Determine the closeness of values across two operators using absolute and relative tolerances. The output
7+
from isclose is an ``int`` value since it's commonly used for reductions and ``bool`` reductions using
8+
atomics are not available in hardware.
9+
10+
11+
.. doxygenfunction:: isclose
12+
13+
Examples
14+
~~~~~~~~
15+
16+
.. literalinclude:: ../../../../test/00_operators/OperatorTests.cu
17+
:language: cpp
18+
:start-after: example-begin isclose-test-1
19+
:end-before: example-end isclose-test-1
20+
:dedent:
21+
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
.. _allclose_func:
2+
3+
allclose
4+
========
5+
6+
Reduce the closeness of two operators to a single scalar (0D) output. The output
7+
from allclose is an ``int`` value since boolean reductions are not available in hardware
8+
9+
10+
.. doxygenfunction:: allclose
11+
12+
Examples
13+
~~~~~~~~
14+
15+
.. literalinclude:: ../../../../test/00_operators/OperatorTests.cu
16+
:language: cpp
17+
:start-after: example-begin allclose-test-1
18+
:end-before: example-end allclose-test-1
19+
:dedent:
20+

examples/spectrogram.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
138138
printf("Not outputting plot since visualizations disabled\n");
139139
#endif
140140
}
141+
141142
}
142143

143144
cudaEventRecord(stop, stream);
@@ -150,6 +151,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
150151
cudaEventDestroy(start);
151152
cudaEventDestroy(stop);
152153
cudaStreamDestroy(stream);
154+
153155
CUDA_CHECK_LAST_ERROR();
154156
MATX_EXIT_HANDLER();
155157
}

include/matx/operators/isclose.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
////////////////////////////////////////////////////////////////////////////////
2+
// BSD 3-Clause License
3+
//
4+
// Copyright (c) 2021, NVIDIA Corporation
5+
// All rights reserved.
6+
//
7+
// Redistribution and use in source and binary forms, with or without
8+
// modification, are permitted provided that the following conditions are met:
9+
//
10+
// 1. Redistributions of source code must retain the above copyright notice, this
11+
// list of conditions and the following disclaimer.
12+
//
13+
// 2. Redistributions in binary form must reproduce the above copyright notice,
14+
// this list of conditions and the following disclaimer in the documentation
15+
// and/or other materials provided with the distribution.
16+
//
17+
// 3. Neither the name of the copyright holder nor the names of its
18+
// contributors may be used to endorse or promote products derived from
19+
// this software without specific prior written permission.
20+
//
21+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24+
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25+
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28+
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29+
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
/////////////////////////////////////////////////////////////////////////////////
32+
33+
#pragma once
34+
35+
36+
#include "matx/core/type_utils.h"
37+
#include "matx/operators/scalar_ops.h"
38+
#include "matx/operators/base_operator.h"
39+
40+
namespace matx
41+
{
42+
43+
namespace detail {
44+
template <typename Op1, typename Op2>
45+
class IsCloseOp : public BaseOp<IsCloseOp<Op1, Op2>>
46+
{
47+
public:
48+
using matxop = bool;
49+
using scalar_type = typename remove_cvref_t<Op2>::scalar_type;
50+
using inner_type = typename inner_op_type_t<scalar_type>::type;
51+
52+
__MATX_INLINE__ std::string str() const { return "isclose()"; }
53+
54+
__MATX_INLINE__ IsCloseOp(Op1 op1, Op2 op2, double rtol, double atol) :
55+
op1_(op1), op2_(op2), rtol_(static_cast<inner_type>(rtol)), atol_(static_cast<inner_type>(atol))
56+
{
57+
static_assert(op1.Rank() == op2.Rank(), "Operator ranks must match in isclose()");
58+
for (int32_t i = 0; i < op2.Rank(); i++) {
59+
MATX_ASSERT_STR(op1.Size(i) == op2.Size(i), matxInvalidDim,
60+
"Size of each dimension must match in isclose()");
61+
}
62+
}
63+
64+
template <typename... Is>
65+
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ int operator()([[maybe_unused]] Is... indices) const
66+
{
67+
68+
return static_cast<int>(detail::_internal_abs(op1_(indices...) - op2_(indices...)) <=
69+
static_cast<inner_type>(atol_) + static_cast<inner_type>(rtol_) * detail::_internal_abs(op2_(indices...)));
70+
}
71+
72+
static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
73+
{
74+
return remove_cvref_t<Op1>::Rank();
75+
}
76+
77+
constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
78+
{
79+
return op1_.Size(dim);
80+
}
81+
82+
private:
83+
Op1 op1_;
84+
Op2 op2_;
85+
inner_type rtol_;
86+
inner_type atol_;
87+
88+
};
89+
}
90+
91+
/**
92+
* @brief Returns an integer tensor where an element is 1 if:
93+
* abs(op1 - op2) <= atol + rtol * abs(op2)
94+
*
95+
* or 0 otherwise
96+
*
97+
* @tparam Op1 First operator type
98+
* @tparam Op2 Second operator type
99+
* @param op1 First operator
100+
* @param op2 Second operator
101+
* @param rtol Relative tolerance
102+
* @param atol Absolute tolerance
103+
* @return IsClose operator
104+
*/
105+
template <typename Op1, typename Op2>
106+
__MATX_INLINE__ auto isclose(Op1 op1, Op2 op2, double rtol = 1e-5, double atol = 1e-8) {
107+
return detail::IsCloseOp<Op1, Op2>(op1, op2, rtol, atol);
108+
}
109+
} // end namespace matx

include/matx/operators/operators.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "matx/operators/ifelse.h"
5252
#include "matx/operators/index.h"
5353
#include "matx/operators/interleaved.h"
54+
#include "matx/operators/isclose.h"
5455
#include "matx/operators/kronecker.h"
5556
#include "matx/operators/legendre.h"
5657
#include "matx/operators/permute.h"

include/matx/transforms/reduce.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ __MATX_DEVICE__ __MATX_INLINE__ void atomicAll(int *addr, int val)
385385
}
386386
};
387387

388+
388389
__MATX_DEVICE__ __MATX_INLINE__ void atomicAll(unsigned int *addr, unsigned int val)
389390
{
390391
unsigned int assumed;
@@ -2837,6 +2838,76 @@ void __MATX_INLINE__ all(OutType dest, const InType &in, const int (&dims)[D], E
28372838
#endif
28382839
}
28392840

2841+
/**
2842+
* Find if all values are != 0
2843+
*
2844+
* Returns a boolean value indicating whether all values in the set of inputs
2845+
* are non-zero. The same aggregation rules apply for input vs output tensor
2846+
* size and what type of reduction is done.
2847+
*
2848+
* @tparam OutType
2849+
* Output data type
2850+
* @tparam InType
2851+
* Input data type
2852+
*
2853+
* @param dest
2854+
* Destination view of reduction
2855+
* @param in
2856+
* Input data to reduce
2857+
* @param exec
2858+
* CUDA executor or stream ID
2859+
*/
2860+
template <typename OutType, typename InType1, typename InType2>
2861+
void __MATX_INLINE__ allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, cudaExecutor exec = 0)
2862+
{
2863+
#ifdef __CUDACC__
2864+
MATX_NVTX_START("allclose(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
2865+
static_assert(OutType::Rank() == 0, "allclose output must be rank 0");
2866+
2867+
cudaStream_t stream = exec.getStream();
2868+
reduce(dest, isclose(in1, in2, rtol, atol), detail::reduceOpAll<int>(), stream, true);
2869+
#endif
2870+
}
2871+
2872+
/**
2873+
* Find if all values are != 0
2874+
*
2875+
* Returns a boolean value indicating whether all values in the set of inputs
2876+
* are non-zero. The same aggregation rules apply for input vs output tensor
2877+
* size and what type of reduction is done.
2878+
*
2879+
* @tparam OutType
2880+
* Output data type
2881+
* @tparam InType
2882+
* Input data type
2883+
*
2884+
* @param dest
2885+
* Destination view of reduction
2886+
* @param in
2887+
* Input data to reduce
2888+
* @param exec
2889+
* Single threaded host executor
2890+
*/
2891+
template <typename OutType, typename InType1, typename InType2>
2892+
void __MATX_INLINE__ allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, [[maybe_unused]] SingleThreadHostExecutor exec)
2893+
{
2894+
MATX_NVTX_START("allclose(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
2895+
static_assert(OutType::Rank() == 0, "allclose output must be rank 0");
2896+
2897+
auto isc = isclose(in1, in2, rtol, atol);
2898+
2899+
auto ft = [&](auto &&lin, auto &&lout, [[maybe_unused]] auto &&lbegin, [[maybe_unused]] auto &&lend) {
2900+
*lout = std::all_of(lin, lin + TotalSize(in1), [](int vin) {
2901+
return vin != 0;
2902+
});
2903+
};
2904+
2905+
2906+
ReduceInput(ft, dest, isc);
2907+
}
2908+
2909+
2910+
28402911
/**
28412912
* Compute a variance reduction
28422913
*

test/00_operators/OperatorTests.cu

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,54 @@ TYPED_TEST(OperatorTestsAllExecs, ReshapeOp)
287287
MATX_EXIT_HANDLER();
288288
}
289289

290+
TYPED_TEST(OperatorTestsFloatAllExecs, IsClose)
291+
{
292+
MATX_ENTER_HANDLER();
293+
using TestType = std::tuple_element_t<0, TypeParam>;
294+
using ExecType = std::tuple_element_t<1, TypeParam>;
295+
296+
ExecType exec{};
297+
298+
// example-begin isclose-test-1
299+
auto A = make_tensor<TestType>({5, 5, 5});
300+
auto B = make_tensor<TestType>({5, 5, 5});
301+
auto C = make_tensor<int>({5, 5, 5});
302+
303+
(A = ones<TestType>(A.Shape())).run();
304+
(B = ones<TestType>(B.Shape())).run();
305+
(C = isclose(A, B)).run();
306+
// example-end isclose-test-1
307+
cudaStreamSynchronize(0);
308+
309+
for(int i=0; i < A.Size(0); i++) {
310+
for(int j=0; j < A.Size(1); j++) {
311+
for(int k=0; k < A.Size(2); k++) {
312+
ASSERT_EQ(C(i,j,k), 1);
313+
}
314+
}
315+
}
316+
317+
B(1,1,1) = 2;
318+
(C = isclose(A, B)).run();
319+
cudaStreamSynchronize(0);
320+
321+
for(int i=0; i < A.Size(0); i++) {
322+
for(int j=0; j < A.Size(1); j++) {
323+
for(int k=0; k < A.Size(2); k++) {
324+
if (i == 1 && j == 1 && k == 1) {
325+
ASSERT_EQ(C(i,j,k), 0);
326+
}
327+
else {
328+
ASSERT_EQ(C(i,j,k), 1);
329+
}
330+
}
331+
}
332+
}
333+
334+
MATX_EXIT_HANDLER();
335+
}
336+
337+
290338
TYPED_TEST(OperatorTestsFloatNonComplexAllExecs, FMod)
291339
{
292340
MATX_ENTER_HANDLER();

test/00_operators/ReductionTests.cu

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,36 @@ TYPED_TEST(ReductionTestsNumericNonComplexAllExecs, Any)
627627
MATX_EXIT_HANDLER();
628628
}
629629

630+
TYPED_TEST(ReductionTestsFloatNonComplexNonHalfAllExecs, AllClose)
631+
{
632+
MATX_ENTER_HANDLER();
633+
using TestType = std::tuple_element_t<0, TypeParam>;
634+
using ExecType = std::tuple_element_t<1, TypeParam>;
635+
636+
ExecType exec{};
637+
638+
// example-begin allclose-test-1
639+
auto A = make_tensor<TestType>({5, 5, 5});
640+
auto B = make_tensor<TestType>({5, 5, 5});
641+
auto C = make_tensor<int>();
642+
643+
(A = ones<TestType>(A.Shape())).run();
644+
(B = ones<TestType>(B.Shape())).run();
645+
allclose(C, A, B, 1e-5, 1e-8, exec);
646+
// example-end allclose-test-1
647+
cudaStreamSynchronize(0);
648+
649+
ASSERT_EQ(C(), 1);
650+
651+
B(1,1,1) = 2;
652+
allclose(C, A, B, 1e-5, 1e-8, exec);
653+
cudaStreamSynchronize(0);
654+
655+
ASSERT_EQ(C(), 0);
656+
657+
MATX_EXIT_HANDLER();
658+
}
659+
630660
TYPED_TEST(ReductionTestsNumericNonComplexAllExecs, All)
631661
{
632662
MATX_ENTER_HANDLER();

0 commit comments

Comments
 (0)