From 674273041bc38853099c9f30d209f6c385b26859 Mon Sep 17 00:00:00 2001 From: Pengbo Wang <221450789+pengbowang-nv@users.noreply.github.com> Date: Wed, 13 Aug 2025 14:47:32 +0000 Subject: [PATCH] set zero for kvcache after warmup to avoid nan Signed-off-by: Pengbo Wang <221450789+pengbowang-nv@users.noreply.github.com> --- tensorrt_llm/_torch/pyexecutor/model_engine.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorrt_llm/_torch/pyexecutor/model_engine.py b/tensorrt_llm/_torch/pyexecutor/model_engine.py index 2d00cee05f0..6aea018bded 100644 --- a/tensorrt_llm/_torch/pyexecutor/model_engine.py +++ b/tensorrt_llm/_torch/pyexecutor/model_engine.py @@ -648,6 +648,14 @@ def release_batch(result: ScheduledRequests | None): return with contextlib.ExitStack() as stack: + + def clean_up_kv_cache(): + # Zero the KV cache; NaNs may be introduced during warmup + for layer_idx in kv_cache_manager.layer_offsets.keys(): + kv_cache_manager.get_buffers(layer_idx).zero_() + + stack.callback(clean_up_kv_cache) + if self._torch_compile_enabled: def disable_optimization(backend: Backend):