Skip to content

Commit 9b8ba74

Browse files
committed
adjust test configs
Signed-off-by: Ivy Zhang <[email protected]>
1 parent 93a0fd0 commit 9b8ba74

File tree

4 files changed

+17
-10
lines changed

4 files changed

+17
-10
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
meta-llama/Llama-3.1-8B-Instruct:
22
- accuracy: 74.20
3-
- spec_dec_algo: NGRAM
3+
- spec_dec_algo: NGram
44
accuracy: 74.20
55
- quant_algo: FP8
66
accuracy: 74.30

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ meta-llama/Llama-3.1-8B:
2020
accuracy: 64.99
2121
meta-llama/Llama-3.1-8B-Instruct:
2222
- accuracy: 68.17
23-
- spec_dec_algo: EAGLE3
23+
- spec_dec_algo: Eagle
2424
accuracy: 68.20
25-
- spec_dec_algo: NGRAM
25+
- spec_dec_algo: NGram
2626
accuracy: 68.17
2727
- quant_algo: FP8
2828
accuracy: 67.93

tests/integration/defs/accuracy/test_llm_api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ def test_fp8_pp2(self):
137137
with LLM(self.MODEL_PATH,
138138
pipeline_parallel_size=2,
139139
quant_config=quant_config,
140-
kv_cache_config=kv_cache_config) as llm:
140+
kv_cache_config=kv_cache_config,
141+
max_batch_size=64) as llm:
141142
task = CnnDailymail(self.MODEL_NAME)
142143
task.evaluate(llm)
143144

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def test_fp8_llm_sampler(self):
204204
sampling_params=sampling_params,
205205
extra_acc_spec="temperature=0.8,top_p=0.95")
206206

207+
@skip_pre_hopper
207208
def test_fp8_beam_search(self):
208209
model_path = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct-FP8"
209210
pytorch_config = dict(disable_overlap_scheduler=True)
@@ -228,6 +229,7 @@ def test_fp8_beam_search(self):
228229
sampling_params=sampling_params,
229230
extra_acc_spec="beam_width=4")
230231

232+
@skip_pre_hopper
231233
def test_eagle3(self):
232234
pytorch_config = dict(
233235
disable_overlap_scheduler=True,
@@ -250,15 +252,18 @@ def test_eagle3(self):
250252
task = MMLU(self.MODEL_NAME)
251253
task.evaluate(llm)
252254

255+
@skip_pre_hopper
253256
def test_ngram(self):
254-
pytorch_config = dict(disable_overlap_scheduler=True)
257+
pytorch_config = dict(
258+
disable_overlap_scheduler=True,
259+
cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
260+
)
255261

256262
kv_cache_config = KvCacheConfig(enable_block_reuse=False)
257263

258-
draft_len = 4
259264
spec_config = NGramDecodingConfig(
260-
max_draft_len=draft_len,
261-
max_matching_ngram_size=draft_len,
265+
max_draft_len=4,
266+
max_matching_ngram_size=2,
262267
is_keep_all=True,
263268
is_use_oldest=True,
264269
is_public_pool=True,
@@ -267,7 +272,8 @@ def test_ngram(self):
267272
with LLM(model=self.MODEL_PATH,
268273
**pytorch_config,
269274
kv_cache_config=kv_cache_config,
270-
speculative_config=spec_config) as llm:
275+
speculative_config=spec_config,
276+
max_batch_size=16) as llm:
271277
task = MMLU(self.MODEL_NAME)
272278
task.evaluate(llm)
273279
task = GSM8K(self.MODEL_NAME)
@@ -307,7 +313,7 @@ def test_auto_dtype(self):
307313
task = CnnDailymail(self.MODEL_NAME)
308314
task.evaluate(llm)
309315

310-
@skip_pre_hopper
316+
@skip_pre_ada
311317
def test_fp8_prequantized(self):
312318
model_path = f"{llm_models_root()}/llama-3.2-models/Llama-3.2-1B-FP8"
313319
with LLM(model_path) as llm:

0 commit comments

Comments
 (0)