Skip to content

Commit bebf52f

Browse files
raayandhardominicshanshan
authored andcommitted
[None][feat] adding support for disaggregated multi-instance tests (NVIDIA#6674)
Signed-off-by: raayandhar <[email protected]> Signed-off-by: Wangshanshan <[email protected]>
1 parent 557ef3a commit bebf52f

File tree

3 files changed

+41
-1
lines changed

3 files changed

+41
-1
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,43 @@ def test_auto_dtype(self, overlap_scheduler):
847847
task = MMLU(self.MODEL_NAME)
848848
task.evaluate(llm)
849849

850+
def test_chunked_prefill(self):
851+
ctx_server_config = {
852+
"disable_overlap_scheduler": True,
853+
"cuda_graph_config": None,
854+
"cache_transceiver_config": {
855+
"backend": "DEFAULT"
856+
},
857+
"enable_chunked_prefill": True,
858+
"max_num_tokens": 256,
859+
}
860+
gen_server_config = {
861+
"cuda_graph_config": None,
862+
"cache_transceiver_config": {
863+
"backend": "DEFAULT"
864+
}
865+
}
866+
disaggregated_server_config = {
867+
"hostname": "localhost",
868+
"port": 8000,
869+
"backend": "pytorch",
870+
"context_servers": {
871+
"num_instances": 1,
872+
"urls": ["localhost:8001"]
873+
},
874+
"generation_servers": {
875+
"num_instances": 1,
876+
"urls": ["localhost:8002"]
877+
}
878+
}
879+
with launch_disaggregated_llm(disaggregated_server_config,
880+
ctx_server_config, gen_server_config,
881+
self.MODEL_PATH) as llm:
882+
task = GSM8K(self.MODEL_NAME)
883+
task.evaluate(llm)
884+
task = MMLU(self.MODEL_NAME)
885+
task.evaluate(llm)
886+
850887

851888
@skip_pre_blackwell
852889
@pytest.mark.timeout(3600)
@@ -868,4 +905,3 @@ def test_mixed_ctx_gen_model(self, ctx_pp, gen_tp):
868905
test_sets=[GSM8K, MMLU],
869906
ctx_model=ctx_model,
870907
gen_model=gen_model)
871-

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,8 @@ accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen
572572
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=4]
573573
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]
574574
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=4]
575+
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[GSM8K]
576+
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU]
575577
accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False]
576578
accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True]
577579
accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False]

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ l0_dgx_h100:
5454
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=2-ctx_pp=2]
5555
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2]
5656
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]
57+
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[GSM8K]
58+
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU]
5759
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
5860
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
5961
- test_e2e.py::test_ptp_quickstart_advanced_bs1

0 commit comments

Comments
 (0)