Skip to content

Commit 8db3299

Browse files
committed
test: Add time logging for lora tests
Signed-off-by: Balaram Buddharaju <[email protected]>
1 parent ad662dd commit 8db3299

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed

tests/integration/defs/common.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import os
1717
import platform
1818
import re
19+
import time
1920
from difflib import SequenceMatcher
2021
from pathlib import Path
2122

@@ -771,16 +772,23 @@ def test_multi_lora_support(
771772
zero_lora_weights=True,
772773
use_code_prompts=False,
773774
):
775+
start_time = time.time()
774776
print("Creating dummy LoRAs...")
777+
lora_start = time.time()
775778
lora_paths = generate_dummy_loras(
776779
hf_model_dir=hf_model_dir,
777780
lora_output_dir=llm_venv.get_working_directory(),
778781
num_loras=num_loras,
779782
lora_rank=lora_rank,
780783
target_modules=target_hf_modules,
781784
zero_weights=zero_lora_weights)
785+
lora_end = time.time()
786+
print(
787+
f"Creating dummy LoRAs completed in {(lora_end - lora_start):.2f} seconds."
788+
)
782789

783790
print("Build engines...")
791+
build_start = time.time()
784792
build_cmd = [
785793
"trtllm-build",
786794
f"--checkpoint_dir={tllm_ckpt_dir}",
@@ -801,6 +809,9 @@ def test_multi_lora_support(
801809
"--max_beam_width=1",
802810
]
803811
check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env)
812+
build_end = time.time()
813+
print(
814+
f"Build engines completed in {(build_end - build_start):.2f} seconds.")
804815

805816
if use_code_prompts:
806817
input_prompts = [
@@ -822,6 +833,7 @@ def test_multi_lora_support(
822833
]
823834

824835
print("Run inference with C++ runtime with pybind...")
836+
inference_start = time.time()
825837
run_script = f"{example_root}/../../../run.py" if "core" in example_root else f"{example_root}/../run.py"
826838
run_cmd = [
827839
run_script,
@@ -842,6 +854,15 @@ def test_multi_lora_support(
842854
"--max_output_len=30",
843855
]
844856
venv_check_call(llm_venv, run_cmd)
857+
inference_end = time.time()
858+
print(
859+
f"Inference completed in {(inference_end - inference_start):.2f} seconds."
860+
)
861+
862+
total_time = time.time() - start_time
863+
print(
864+
f"Total test_multi_lora_support execution time: {total_time:.2f} seconds"
865+
)
845866

846867

847868
def get_dummy_spec_decoding_heads(hf_model_dir,

tests/integration/defs/examples/test_gemma.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
import time
1516
from pathlib import Path
1617

1718
import pytest
@@ -429,7 +430,9 @@ def test_hf_gemma_fp8_base_bf16_multi_lora(gemma_model_root,
429430
batch_size=8):
430431
"Run Gemma models with multiple dummy LoRAs."
431432

433+
start_time = time.time()
432434
print("Convert checkpoint by modelopt...")
435+
convert_start = time.time()
433436
kv_cache_dtype = 'fp8' if qformat == 'fp8' else 'int8'
434437
convert_cmd = [
435438
f"{gemma_example_root}/../../../quantization/quantize.py",
@@ -441,7 +444,13 @@ def test_hf_gemma_fp8_base_bf16_multi_lora(gemma_model_root,
441444
f"--output_dir={cmodel_dir}",
442445
]
443446
venv_check_call(llm_venv, convert_cmd)
447+
convert_end = time.time()
448+
print(
449+
f"Convert checkpoint completed in {(convert_end - convert_start):.2f} seconds."
450+
)
444451

452+
test_multi_lora_start = time.time()
453+
print("Calling test_multi_lora_support...")
445454
test_multi_lora_support(
446455
hf_model_dir=gemma_model_root,
447456
tllm_ckpt_dir=cmodel_dir,
@@ -454,3 +463,10 @@ def test_hf_gemma_fp8_base_bf16_multi_lora(gemma_model_root,
454463
target_trtllm_modules=["attn_q", "attn_k", "attn_v"],
455464
zero_lora_weights=True,
456465
)
466+
test_multi_lora_end = time.time()
467+
print(
468+
f"test_multi_lora_support completed in {(test_multi_lora_end - test_multi_lora_start):.2f} seconds"
469+
)
470+
471+
total_time = time.time() - start_time
472+
print(f"Total function execution time: {total_time:.2f} seconds")

tests/integration/defs/examples/test_granite.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515

1616
import os
17+
import time
1718

1819
import pytest
1920
from defs.common import (convert_weights, test_multi_lora_support,
@@ -96,7 +97,9 @@ def test_granite_bf16_lora(llama_example_root,
9697
"Run Granite 3.0 models with multiple dummy LoRAs."
9798

9899
# TODO: Enable fp8 quantization when ModelOpt changes for Granite are available.
100+
start_time = time.time()
99101
print("Converting checkpoint...")
102+
convert_start = time.time()
100103
model_name = os.path.basename(llm_granite_model_root)
101104
dtype = 'bfloat16'
102105

@@ -108,6 +111,11 @@ def test_granite_bf16_lora(llama_example_root,
108111
model_path=llm_granite_model_root,
109112
data_type=dtype,
110113
)
114+
convert_end = time.time()
115+
print(
116+
f"Convert checkpoint completed in {(convert_end - convert_start):.2f} seconds."
117+
)
118+
111119
target_hf_modules = [
112120
"q_proj",
113121
"k_proj",
@@ -122,6 +130,8 @@ def test_granite_bf16_lora(llama_example_root,
122130
target_hf_modules += ["moe_h_to_4h", "moe_4h_to_h", "moe_gate"]
123131
target_trtllm_modules += ["moe_h_to_4h", "moe_4h_to_h", "moe_gate"]
124132

133+
print("Calling test_multi_lora_support...")
134+
test_multi_lora_start = time.time()
125135
test_multi_lora_support(
126136
hf_model_dir=llm_granite_model_root,
127137
tllm_ckpt_dir=ckpt_dir,
@@ -134,3 +144,10 @@ def test_granite_bf16_lora(llama_example_root,
134144
target_trtllm_modules=target_trtllm_modules,
135145
zero_lora_weights=True,
136146
)
147+
test_multi_lora_end = time.time()
148+
print(
149+
f"test_multi_lora_support completed in {(test_multi_lora_end - test_multi_lora_start):.2f} seconds"
150+
)
151+
152+
total_time = time.time() - start_time
153+
print(f"Total function execution time: {total_time:.2f} seconds")

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,9 +393,7 @@ examples/test_gpt.py::test_starcoder_fp8_quantization_2gpu[starcoderplus] SKIP (
393393
examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5360086)
394394
examples/test_multimodal.py::test_llm_multimodal_general[llava-1.5-7b-hf-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5360086)
395395
test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b] SKIP (https://nvbugs/5320234)
396-
examples/test_granite.py::test_granite_bf16_lora[granite-3.0-1b-a400m-instruct] SKIP (https://nvbugs/5374145)
397396
stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test] SKIP (https://nvbugs/5375646)
398-
examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it] SKIP (https://nvbugs/5376087)
399397
full:GH200/disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_attention_dp_one[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5375966)
400398
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5375620)
401399
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5377465)

0 commit comments

Comments
 (0)