We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a00ca11 commit 6742730Copy full SHA for 6742730
tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -648,6 +648,14 @@ def release_batch(result: ScheduledRequests | None):
648
return
649
650
with contextlib.ExitStack() as stack:
651
+
652
+ def clean_up_kv_cache():
653
+ # Zero the KV cache; NaNs may be introduced during warmup
654
+ for layer_idx in kv_cache_manager.layer_offsets.keys():
655
+ kv_cache_manager.get_buffers(layer_idx).zero_()
656
657
+ stack.callback(clean_up_kv_cache)
658
659
if self._torch_compile_enabled:
660
661
def disable_optimization(backend: Backend):
0 commit comments