We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fdd7d53 commit 5011090Copy full SHA for 5011090
driverapi/src/cmdqueue.cpp
@@ -568,7 +568,7 @@ NvCommandQueue::launchFunction(LibreCUFunction function,
568
// check launch dimensions
569
NvU32 max_threads = ((65536 / roundUp(maxOf(1u, function->num_registers) * 32, 256u)) / 4) * 4 * 32;
570
571
- uint32_t shmem_usage = function->shared_mem;
+ uint32_t shmem_usage = maxOf(function->shared_mem, sharedMemBytes);
572
573
NvU32 blockProd = blockDimX * blockDimY * blockDimZ;
574
if ((shmem_usage > sharedMemBytes) && (blockProd > 1024 || max_threads < blockProd)) {
0 commit comments