@@ -32,7 +32,6 @@ int main() {
32
32
libreCuDeviceGetName (name_buffer, 256 , device);
33
33
std::cout << " Device Name: " + std::string (name_buffer) << std::endl;
34
34
35
- LibreCUmodule module {};
36
35
37
36
// read cubin file
38
37
uint8_t *image;
@@ -46,39 +45,28 @@ int main() {
46
45
std::memcpy (image, bytes.data (), bytes.size ());
47
46
n_bytes = bytes.size ();
48
47
}
49
- CUDA_CHECK (libreCuModuleLoadData (&module , image, n_bytes));
50
48
51
- // read functions
52
- uint32_t num_funcs{};
53
- CUDA_CHECK (libreCuModuleGetFunctionCount (&num_funcs, module ));
54
- std::cout << " Num functions: " << num_funcs << std::endl;
55
-
56
- auto *functions = new LibreCUFunction[num_funcs];
57
- CUDA_CHECK (libreCuModuleEnumerateFunctions (functions, num_funcs, module ));
58
-
59
- for (size_t i = 0 ; i < num_funcs; i++) {
60
- LibreCUFunction func = functions[i];
61
- const char *func_name{};
62
- CUDA_CHECK (libreCuFuncGetName (&func_name, func));
63
- std::cout << " function \" " << func_name << " \" " << std::endl;
49
+ size_t num_kernels = 1025 ;
50
+ LibreCUmodule modules[num_kernels];
51
+ for (int i = 0 ; i < num_kernels; i++) {
52
+ CUDA_CHECK (libreCuModuleLoadData (modules + i, image, n_bytes));
64
53
}
65
54
66
- delete[] functions;
67
-
68
- // find function
69
- LibreCUFunction func{} ;
70
- CUDA_CHECK ( libreCuModuleGetFunction (&func, module , " emtpy_kernel " ));
55
+ // find functions
56
+ LibreCUFunction funcs[num_kernels];
57
+ for ( int i = 0 ; i < num_kernels; i++) {
58
+ CUDA_CHECK ( libreCuModuleGetFunction (funcs + i, modules[i], " emtpy_kernel " )) ;
59
+ }
71
60
72
61
// create stream
73
62
LibreCUstream stream{};
74
63
CUDA_CHECK (libreCuStreamCreate (&stream, 0 ));
75
64
76
65
void *params[] = {};
77
- size_t num_kernels = 1025 ;
78
66
79
67
auto start = std::chrono::high_resolution_clock::now ();
80
68
for (int i = 0 ; i < num_kernels; ++i) {
81
- CUDA_CHECK (libreCuLaunchKernel (func ,
69
+ CUDA_CHECK (libreCuLaunchKernel (funcs[i] ,
82
70
1 , 1 , 1 ,
83
71
1 , 1 , 1 ,
84
72
0 ,
@@ -105,7 +93,9 @@ int main() {
105
93
CUDA_CHECK (libreCuStreamDestroy (stream));
106
94
107
95
// unload module
108
- CUDA_CHECK (libreCuModuleUnload (module ));
96
+ for (int i = 0 ; i < num_kernels; ++i) {
97
+ CUDA_CHECK (libreCuModuleUnload (modules[i]));
98
+ }
109
99
110
100
// destroy ctx
111
101
CUDA_CHECK (libreCuCtxDestroy (ctx));
0 commit comments