@@ -273,6 +273,19 @@ def get_statistics_dict(self) -> Dict[str, Any]:
273273 },
274274 }
275275
276+ # Retrieve KV cache information.
277+ kv_cache_config = self .kwargs .get ("kv_cache_config" , KvCacheConfig ())
278+ if isinstance (kv_cache_config , KvCacheConfig ):
279+ kv_cache_dtype = kv_cache_config .dtype
280+ kv_cache_mem_percent = kv_cache_config .free_gpu_memory_fraction
281+ elif isinstance (kv_cache_config , dict ):
282+ kv_cache_dtype = kv_cache_config .get ("dtype" , "auto" )
283+ kv_cache_mem_percent = kv_cache_config .get (
284+ "free_gpu_memory_fraction" )
285+ else :
286+ raise ValueError (
287+ f"Invalid kv_cache_config type: { type (kv_cache_config )} ." )
288+
276289 # Engine/Backend details
277290 if self .rt_cfg .backend not in ('pytorch' , '_autodeploy' ):
278291 config_path = self .rt_cfg .engine_dir / "config.json"
@@ -302,18 +315,6 @@ def get_statistics_dict(self) -> Dict[str, Any]:
302315 model = self .rt_cfg .model_path or self .rt_cfg .model
303316 model_config = ModelConfig .from_pretrained (model ,
304317 trust_remote_code = True )
305- kv_cache_config = self .kwargs .get ("kv_cache_config" ,
306- KvCacheConfig ())
307- if isinstance (kv_cache_config , KvCacheConfig ):
308- kv_cache_dtype = kv_cache_config .dtype
309- kv_cache_mem_percent = kv_cache_config .free_gpu_memory_fraction
310- elif isinstance (kv_cache_config , dict ):
311- kv_cache_dtype = kv_cache_config .get ("dtype" , "auto" )
312- kv_cache_mem_percent = kv_cache_config .get (
313- "free_gpu_memory_fraction" )
314- else :
315- raise ValueError (
316- f"Invalid kv_cache_config type: { type (kv_cache_config )} ." )
317318
318319 validate_and_set_kv_cache_quant (model_config , kv_cache_dtype )
319320
0 commit comments