save initial changes for test

brb-nv · brb-nv · commit cb175f12a86b · 2025-07-25T21:10:55.000Z
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -23,6 +23,8 @@
                                  KvCacheConfig, MoeConfig, MTPDecodingConfig,
                                  NGramDecodingConfig, SamplingParams,
                                  TorchCompileConfig)
+from defs.common import generate_dummy_loras
+from tensorrt_llm.lora_manager import LoraConfig
 from tensorrt_llm.quantization import QuantAlgo
 
 from ..conftest import (llm_models_root, parametrize_with_ids, skip_no_hopper,
@@ -590,6 +592,37 @@ def test_auto_dtype_chunked_prefill(self):
             task = GSM8K(self.MODEL_NAME)
             task.evaluate(llm)
 
+    # This is a smoke test to make sure LoRA works.
+    def test_lora(self):
+        model_path = f"{llm_models_root()}/gemma/gemma-3-1b-it/"
+        lora_rank = 32
+        num_loras = 1
+        print(f"Generating {num_loras} dummy LoRAs with rank {lora_rank}...")
+        lora_output_dirs = generate_dummy_loras(
+            hf_model_dir=model_path,
+            lora_output_dir="/tmp/lora_output",
+            num_loras=num_loras,
+            lora_rank=lora_rank,
+            target_modules=["q_proj", "k_proj", "v_proj"], # "gate_proj", "down_proj", "up_proj"],
+            zero_weights=True,
+        )
+        print("lora_output_dirs: ", lora_output_dirs)
+        lora_config = LoraConfig(
+            lora_dir=lora_output_dirs,
+            lora_ckpt_source="hf",
+            max_lora_rank=lora_rank,
+            lora_target_modules=['attn_q', 'attn_k', 'attn_v'], # "mlp_h_to_4h", "mlp_4h_to_h", "mlp_gate"],
+            max_loras=num_loras,
+            max_cpu_loras=num_loras,
+        )
+        # Disabling kv cache reuse as a WAR to deal with gaps in kernel support for Gemma3's non-inclusive sliding window size.
+        kv_cache_config = KvCacheConfig(
+            enable_block_reuse=False,
+            enable_partial_reuse=False,
+        )
+        with LLM(model_path, lora_config=lora_config, enable_lora=True, kv_cache_config=kv_cache_config) as llm:
+            task = GSM8K(self.MODEL_NAME)
+            task.evaluate(llm)
 
 class TestMixtral8x7B(LlmapiAccuracyTestHarness):
     MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"