Skip to content

Commit 569356c

Browse files
committed
Further tweaks for KV info.
Signed-off-by: Frank Di Natale <[email protected]>
1 parent eb9b9c4 commit 569356c

File tree

1 file changed

+13
-12
lines changed

1 file changed

+13
-12
lines changed

tensorrt_llm/bench/dataclasses/reporting.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,19 @@ def get_statistics_dict(self) -> Dict[str, Any]:
273273
},
274274
}
275275

276+
# Retrieve KV cache information.
277+
kv_cache_config = self.kwargs.get("kv_cache_config", KvCacheConfig())
278+
if isinstance(kv_cache_config, KvCacheConfig):
279+
kv_cache_dtype = kv_cache_config.dtype
280+
kv_cache_mem_percent = kv_cache_config.free_gpu_memory_fraction
281+
elif isinstance(kv_cache_config, dict):
282+
kv_cache_dtype = kv_cache_config.get("dtype", "auto")
283+
kv_cache_mem_percent = kv_cache_config.get(
284+
"free_gpu_memory_fraction")
285+
else:
286+
raise ValueError(
287+
f"Invalid kv_cache_config type: {type(kv_cache_config)}.")
288+
276289
# Engine/Backend details
277290
if self.rt_cfg.backend not in ('pytorch', '_autodeploy'):
278291
config_path = self.rt_cfg.engine_dir / "config.json"
@@ -302,18 +315,6 @@ def get_statistics_dict(self) -> Dict[str, Any]:
302315
model = self.rt_cfg.model_path or self.rt_cfg.model
303316
model_config = ModelConfig.from_pretrained(model,
304317
trust_remote_code=True)
305-
kv_cache_config = self.kwargs.get("kv_cache_config",
306-
KvCacheConfig())
307-
if isinstance(kv_cache_config, KvCacheConfig):
308-
kv_cache_dtype = kv_cache_config.dtype
309-
kv_cache_mem_percent = kv_cache_config.free_gpu_memory_fraction
310-
elif isinstance(kv_cache_config, dict):
311-
kv_cache_dtype = kv_cache_config.get("dtype", "auto")
312-
kv_cache_mem_percent = kv_cache_config.get(
313-
"free_gpu_memory_fraction")
314-
else:
315-
raise ValueError(
316-
f"Invalid kv_cache_config type: {type(kv_cache_config)}.")
317318

318319
validate_and_set_kv_cache_quant(model_config, kv_cache_dtype)
319320

0 commit comments

Comments
 (0)