Skip to content

Commit 0667e03

Browse files
committed
fixup some bug for smooth_quant_plugin can't load
1 parent 242703d commit 0667e03

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

tensorrt_llm_july-release-v1/cpp/tensorrt_llm/kernels/cutlass_kernels/int8_gemm/int8_gemm_template.h

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,11 @@ template <typename T>
372372
float CutlassInt8GemmRunner<T>::profileConfig(const tkc::CutlassGemmConfig& config, tk::QuantOption quantOption, int m,
373373
int n, int k, int8_t* A, int8_t* B, void* C, float* alphaCol, float* alphaRow, char* workspace)
374374
{
375-
constexpr int warmup = 5;
376-
constexpr int runs = 15;
375+
// reduce run times to reduce shared memory
376+
// try to make warmup:run = 1:3
377+
// different GPU has difference times
378+
constexpr int warmup = 3;
379+
constexpr int runs = 10;
377380

378381
const auto workspaceBytes = getWorkspaceSize(m, n, k);
379382

@@ -423,19 +426,32 @@ tkc::CutlassGemmConfig CutlassInt8GemmRunner<T>::profileGemm(tk::QuantOption qua
423426

424427
float bestTime = std::numeric_limits<float>::max();
425428
tkc::CutlassGemmConfig bestConfig;
426-
429+
float time = bestTime;
430+
bool is_ok = false;
427431
for (int ii = 0; ii < candidateConfigs.size(); ++ii)
428432
{
429433
tkc::CutlassGemmConfig candidateConfig = candidateConfigs[ii];
430-
const float time = profileConfig(candidateConfig, quantOption, m, n, k, A, B, C, alphaCol, alphaRow, workspace);
431-
if (time < bestTime)
432-
{
434+
435+
try {
436+
time = profileConfig(candidateConfig, quantOption, m, n, k, A, B, C, alphaCol, alphaRow, workspace);
437+
is_ok = true;
438+
} catch (...) {
439+
std::ostringstream msg;
440+
msg << "it seem init failed, because has no enough shared memory.";
441+
TLLM_LOG_DEBUG(msg.str());
442+
}
443+
if (time < bestTime) {
433444
bestConfig = candidateConfig;
434445
bestTime = time;
435446
}
436447
}
437-
438-
return bestConfig;
448+
if (is_ok) {
449+
return bestConfig;
450+
} else {
451+
std::ostringstream msg;
452+
msg << "it seem can't found any good config.";
453+
TLLM_LOG_ERROR(msg.str());
454+
}
439455
}
440456

441457
template <typename T>

0 commit comments

Comments
 (0)