Skip to content

Commit e71d3ac

Browse files
committed
use distinct kernels in many kernels launch
1 parent 7080e42 commit e71d3ac

File tree

1 file changed

+13
-23
lines changed

1 file changed

+13
-23
lines changed

tests/many_kernels_launch/main.cpp

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ int main() {
3232
libreCuDeviceGetName(name_buffer, 256, device);
3333
std::cout << "Device Name: " + std::string(name_buffer) << std::endl;
3434

35-
LibreCUmodule module{};
3635

3736
// read cubin file
3837
uint8_t *image;
@@ -46,39 +45,28 @@ int main() {
4645
std::memcpy(image, bytes.data(), bytes.size());
4746
n_bytes = bytes.size();
4847
}
49-
CUDA_CHECK(libreCuModuleLoadData(&module, image, n_bytes));
5048

51-
// read functions
52-
uint32_t num_funcs{};
53-
CUDA_CHECK(libreCuModuleGetFunctionCount(&num_funcs, module));
54-
std::cout << "Num functions: " << num_funcs << std::endl;
55-
56-
auto *functions = new LibreCUFunction[num_funcs];
57-
CUDA_CHECK(libreCuModuleEnumerateFunctions(functions, num_funcs, module));
58-
59-
for (size_t i = 0; i < num_funcs; i++) {
60-
LibreCUFunction func = functions[i];
61-
const char *func_name{};
62-
CUDA_CHECK(libreCuFuncGetName(&func_name, func));
63-
std::cout << " function \"" << func_name << "\"" << std::endl;
49+
size_t num_kernels = 1025;
50+
LibreCUmodule modules[num_kernels];
51+
for (int i = 0; i < num_kernels; i++) {
52+
CUDA_CHECK(libreCuModuleLoadData(modules + i, image, n_bytes));
6453
}
6554

66-
delete[] functions;
67-
68-
// find function
69-
LibreCUFunction func{};
70-
CUDA_CHECK(libreCuModuleGetFunction(&func, module, "emtpy_kernel"));
55+
// find functions
56+
LibreCUFunction funcs[num_kernels];
57+
for (int i = 0; i < num_kernels; i++) {
58+
CUDA_CHECK(libreCuModuleGetFunction(funcs + i, modules[i], "emtpy_kernel"));
59+
}
7160

7261
// create stream
7362
LibreCUstream stream{};
7463
CUDA_CHECK(libreCuStreamCreate(&stream, 0));
7564

7665
void *params[] = {};
77-
size_t num_kernels = 1025;
7866

7967
auto start = std::chrono::high_resolution_clock::now();
8068
for (int i = 0; i < num_kernels; ++i) {
81-
CUDA_CHECK(libreCuLaunchKernel(func,
69+
CUDA_CHECK(libreCuLaunchKernel(funcs[i],
8270
1, 1, 1,
8371
1, 1, 1,
8472
0,
@@ -105,7 +93,9 @@ int main() {
10593
CUDA_CHECK(libreCuStreamDestroy(stream));
10694

10795
// unload module
108-
CUDA_CHECK(libreCuModuleUnload(module));
96+
for (int i = 0; i < num_kernels; ++i) {
97+
CUDA_CHECK(libreCuModuleUnload(modules[i]));
98+
}
10999

110100
// destroy ctx
111101
CUDA_CHECK(libreCuCtxDestroy(ctx));

0 commit comments

Comments
 (0)