Skip to content

Commit 1bfe456

Browse files
authored
Misc Cleanup (#814)
* Cleanup #define's in filter.cuh * Cleanup #define's in other files * Fix dereferencing type-punned pointer bug in Release mode * Fix Werror=uninitialized compile error when MATX_EN_OPENBLAS=ON in Release mode * Fix uninitialized variable bug in svd plan * Update PrintTests for default tensor name
1 parent b1a02f1 commit 1bfe456

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+650
-631
lines changed

CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,10 @@ if (MATX_NVTX_FLAGS)
185185
target_compile_definitions(matx INTERFACE MATX_NVTX_FLAGS)
186186
endif()
187187
if (MATX_BUILD_32_BIT)
188-
set(INT_TYPE "lp64")
189-
target_compile_definitions(matx INTERFACE INDEX_32_BIT)
188+
set(MATX_NVPL_INT_TYPE "lp64")
189+
target_compile_definitions(matx INTERFACE MATX_INDEX_32_BIT)
190190
else()
191-
set(INT_TYPE "ilp64")
191+
set(MATX_NVPL_INT_TYPE "ilp64")
192192
endif()
193193

194194
# Host support
@@ -211,13 +211,13 @@ if (MATX_EN_NVPL OR MATX_EN_X86_FFTW OR MATX_EN_BLIS OR MATX_EN_OPENBLAS)
211211
endif()
212212

213213
if (MATX_EN_NVPL)
214-
message(STATUS "Enabling NVPL library support for ARM CPUs with ${INT_TYPE} interface")
214+
message(STATUS "Enabling NVPL library support for ARM CPUs with ${MATX_NVPL_INT_TYPE} interface")
215215
find_package(nvpl REQUIRED COMPONENTS fft blas lapack HINTS ${blas_DIR})
216216
if (NOT MATX_BUILD_32_BIT)
217217
target_compile_definitions(matx INTERFACE NVPL_ILP64)
218218
endif()
219219
target_compile_definitions(matx INTERFACE NVPL_LAPACK_COMPLEX_CUSTOM)
220-
target_link_libraries(matx INTERFACE nvpl::fftw nvpl::blas_${INT_TYPE}_omp nvpl::lapack_${INT_TYPE}_omp)
220+
target_link_libraries(matx INTERFACE nvpl::fftw nvpl::blas_${MATX_NVPL_INT_TYPE}_omp nvpl::lapack_${MATX_NVPL_INT_TYPE}_omp)
221221
target_compile_definitions(matx INTERFACE MATX_EN_NVPL)
222222
else()
223223
# FFTW

examples/black_scholes.cu

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ using namespace matx;
4646
* instructions. While caching helps, this can have a slight performance impact when compared to native CUDA
4747
* kernels. To work around this problem, complex expressions can be placed in a custom operator by adding some
4848
* boilerplate code around the original expression. This custom operator can then be used either alone or inside
49-
* other arithmetic expressions, and only a single load is issues for each tensor.
50-
*
49+
* other arithmetic expressions, and only a single load is issues for each tensor.
50+
*
5151
* This example uses the Black-Scholes equtation to demonstrate the two ways to implement the equation in MatX, and
5252
* shows the performance difference.
5353
*/
@@ -76,7 +76,7 @@ public:
7676
auto d2 = d1 - VsqrtT;
7777
auto cdf_d1 = normcdf(d1);
7878
auto cdf_d2 = normcdf(d2);
79-
auto expRT = exp(-1 * r * T);
79+
auto expRT = exp(-1 * r * T);
8080

8181
out_(idx) = S * cdf_d1 - K * expRT * cdf_d2;
8282
}
@@ -87,20 +87,20 @@ public:
8787

8888
/* Arithmetic expression */
8989
template<typename T1>
90-
void compute_black_scholes_matx(tensor_t<T1,1>& K,
91-
tensor_t<T1,1>& S,
92-
tensor_t<T1,1>& V,
93-
tensor_t<T1,1>& r,
94-
tensor_t<T1,1>& T,
95-
tensor_t<T1,1>& output,
90+
void compute_black_scholes_matx(tensor_t<T1,1>& K,
91+
tensor_t<T1,1>& S,
92+
tensor_t<T1,1>& V,
93+
tensor_t<T1,1>& r,
94+
tensor_t<T1,1>& T,
95+
tensor_t<T1,1>& output,
9696
cudaExecutor& exec)
9797
{
9898
auto VsqrtT = V * sqrt(T);
9999
auto d1 = (log(S / K) + (r + 0.5 * V * V) * T) / VsqrtT ;
100100
auto d2 = d1 - VsqrtT;
101101
auto cdf_d1 = normcdf(d1);
102102
auto cdf_d2 = normcdf(d2);
103-
auto expRT = exp(-1 * r * T);
103+
auto expRT = exp(-1 * r * T);
104104

105105
(output = S * cdf_d1 - K * expRT * cdf_d2).run(exec);
106106
}
@@ -120,13 +120,13 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
120120
tensor_t<dtype, 1> V_tensor{{input_size}};
121121
tensor_t<dtype, 1> r_tensor{{input_size}};
122122
tensor_t<dtype, 1> T_tensor{{input_size}};
123-
tensor_t<dtype, 1> output_tensor{{input_size}};
123+
tensor_t<dtype, 1> output_tensor{{input_size}};
124124

125125
cudaStream_t stream;
126126
cudaStreamCreate(&stream);
127127
cudaExecutor exec{stream};
128128

129-
compute_black_scholes_matx(K_tensor, S_tensor, V_tensor, r_tensor, T_tensor, output_tensor, exec);
129+
compute_black_scholes_matx(K_tensor, S_tensor, V_tensor, r_tensor, T_tensor, output_tensor, exec);
130130

131131
cudaEvent_t start, stop;
132132
cudaEventCreate(&start);
@@ -154,11 +154,11 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
154154
cudaEventElapsedTime(&time_ms, start, stop);
155155

156156
printf("Time with custom operator = %.2fms per iteration\n",
157-
time_ms / num_iterations);
157+
time_ms / num_iterations);
158158

159159
cudaEventDestroy(start);
160160
cudaEventDestroy(stop);
161161
cudaStreamDestroy(stream);
162-
CUDA_CHECK_LAST_ERROR();
162+
MATX_CUDA_CHECK_LAST_ERROR();
163163
MATX_EXIT_HANDLER();
164164
}

examples/cgsolve.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,6 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
8686
// example-end sync-test-1
8787
printf ("max l2 norm: %f\n", (float)sqrt(maxn()));
8888

89-
CUDA_CHECK_LAST_ERROR();
89+
MATX_CUDA_CHECK_LAST_ERROR();
9090
MATX_EXIT_HANDLER();
9191
}

examples/channelize_poly_bench.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ void ChannelizePolyBench(matx::index_t channel_start, matx::index_t channel_stop
101101
}
102102
cudaEventRecord(stop, stream);
103103
exec.sync();
104-
CUDA_CHECK_LAST_ERROR();
104+
MATX_CUDA_CHECK_LAST_ERROR();
105105
cudaEventElapsedTime(&elapsed_ms, start, stop);
106106

107107
const double avg_elapsed_us = (static_cast<double>(elapsed_ms)/NUM_ITERATIONS)*1.0e3;
@@ -112,7 +112,7 @@ void ChannelizePolyBench(matx::index_t channel_start, matx::index_t channel_stop
112112
printf("\n");
113113
}
114114

115-
CUDA_CHECK_LAST_ERROR();
115+
MATX_CUDA_CHECK_LAST_ERROR();
116116

117117
cudaEventDestroy(start);
118118
cudaEventDestroy(stop);

examples/conv2d.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,23 @@ using namespace matx;
3939
int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
4040
{
4141
MATX_ENTER_HANDLER();
42-
42+
4343
index_t iN = 4;
4444
index_t iM = 6;
45-
45+
4646
index_t fN = 4;
4747
index_t fM = 2;
48-
48+
4949
auto in = make_tensor<int>({iN,iM});
5050
auto filter = make_tensor<int>({fN,fM});
51-
51+
5252
in.SetVals({ {1,2,3,4,5,6},
5353
{5,4,3,2,1,0},
5454
{3,4,5,6,7,8},
5555
{1,2,3,4,5,6},
5656
});
5757

58-
filter.SetVals({ {1,2},
58+
filter.SetVals({ {1,2},
5959
{3,4},
6060
{5,6},
6161
{7,8}});
@@ -73,9 +73,9 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
7373
index_t oM = iM - fM + 1;
7474
auto mode = MATX_C_MODE_VALID;
7575
#endif
76-
76+
7777
auto out = make_tensor<int>({oN,oM});
78-
78+
7979
(out = conv2d(in, filter, mode)).run();
8080

8181
printf("in:\n");
@@ -86,6 +86,6 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
8686
print(out);
8787

8888

89-
CUDA_CHECK_LAST_ERROR();
89+
MATX_CUDA_CHECK_LAST_ERROR();
9090
MATX_EXIT_HANDLER();
9191
}

examples/convolution.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
9393
for (uint32_t i = 0; i < iterations; i++) {
9494
(outView = conv1d(inView, filterView, matxConvCorrMode_t::MATX_C_MODE_FULL)).run(exec);
9595
}
96-
96+
9797

9898
cudaEventRecord(stop, stream);
9999
exec.sync();
@@ -149,6 +149,6 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
149149

150150
matxPrintMemoryStatistics();
151151

152-
CUDA_CHECK_LAST_ERROR();
152+
MATX_CUDA_CHECK_LAST_ERROR();
153153
MATX_EXIT_HANDLER();
154154
}

0 commit comments

Comments
 (0)