Skip to content

Commit d328f78

Browse files
authored
[CI] Enable all hf transformers baselines in test_hybrid (vllm-project#23936)
Signed-off-by: Thomas Parnell <[email protected]>
1 parent 98aee61 commit d328f78

File tree

2 files changed

+31
-56
lines changed

2 files changed

+31
-56
lines changed

tests/models/language/generation/test_hybrid.py

Lines changed: 22 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,6 @@
3434
"LiquidAI/LFM2-1.2B",
3535
]
3636

37-
HF_UNSUPPORTED_MODELS = [
38-
# The HF transformers implementation of
39-
# Mamba2 is buggy for Codestral as it doesn't handle n_groups, so the test
40-
# doesn't compare vLLM output with HF output.
41-
# See https://github.com/huggingface/transformers/pull/35943
42-
"yujiepan/mamba2-codestral-v0.1-tiny-random",
43-
# transformers 4.55 is still producing garbage for this model
44-
# TODO(tdoublep): follow-up on transformers side
45-
"ibm-granite/granite-4.0-tiny-preview"
46-
]
47-
4837
V1_SUPPORTED_MODELS = [
4938
"state-spaces/mamba-130m-hf",
5039
"ai21labs/Jamba-tiny-dev",
@@ -90,20 +79,13 @@ def test_models(
9079
try:
9180
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
9281
model_info.check_available_online(on_fail="skip")
93-
hf_version_check = model_info.check_transformers_version(
94-
on_fail="return")
82+
model_info.check_transformers_version(on_fail="skip")
9583
except ValueError:
96-
hf_version_check = None
97-
98-
if hf_version_check is not None:
99-
print(f"Skipping transformers comparison because: {hf_version_check}")
84+
pass
10085

10186
with hf_runner(model) as hf_model:
102-
if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None:
103-
hf_outputs = hf_model.generate_greedy_logprobs_limit(
104-
example_prompts, max_tokens, num_logprobs)
105-
else:
106-
hf_outputs = None
87+
hf_outputs = hf_model.generate_greedy_logprobs_limit(
88+
example_prompts, max_tokens, num_logprobs)
10789

10890
with monkeypatch.context() as m:
10991
m.setenv("VLLM_USE_V1", "0")
@@ -121,7 +103,7 @@ def test_models(
121103
else:
122104
vllm_v1_outputs = None
123105

124-
if hf_outputs is not None and vllm_v0_outputs is not None:
106+
if vllm_v0_outputs is not None:
125107
check_logprobs_close(
126108
outputs_0_lst=hf_outputs,
127109
outputs_1_lst=vllm_v0_outputs,
@@ -130,12 +112,10 @@ def test_models(
130112
)
131113

132114
if model in V1_SUPPORTED_MODELS:
133-
ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
134-
assert ref_outputs is not None
135115
check_logprobs_close(
136-
outputs_0_lst=ref_outputs,
116+
outputs_0_lst=hf_outputs,
137117
outputs_1_lst=vllm_v1_outputs,
138-
name_0="hf" if hf_outputs is not None else "vllm-v0",
118+
name_0="hf",
139119
name_1="vllm-v1",
140120
)
141121

@@ -402,11 +382,8 @@ def test_full_cuda_graph(
402382
pass
403383

404384
with hf_runner(model) as hf_model:
405-
if model not in HF_UNSUPPORTED_MODELS:
406-
hf_outputs = hf_model.generate_greedy_logprobs_limit(
407-
example_prompts, max_tokens, num_logprobs)
408-
else:
409-
hf_outputs = None
385+
hf_outputs = hf_model.generate_greedy_logprobs_limit(
386+
example_prompts, max_tokens, num_logprobs)
410387

411388
with monkeypatch.context() as m:
412389
m.setenv("VLLM_USE_V1", "0")
@@ -421,20 +398,18 @@ def test_full_cuda_graph(
421398
vllm_v1_outputs = vllm_model.generate_greedy_logprobs(
422399
example_prompts, max_tokens, num_logprobs)
423400

424-
if hf_outputs is not None and vllm_v0_outputs is not None:
401+
if vllm_v0_outputs is not None:
425402
check_logprobs_close(
426403
outputs_0_lst=hf_outputs,
427404
outputs_1_lst=vllm_v0_outputs,
428405
name_0="hf",
429406
name_1="vllm-v0",
430407
)
431408

432-
ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
433-
assert ref_outputs is not None
434409
check_logprobs_close(
435-
outputs_0_lst=ref_outputs,
410+
outputs_0_lst=hf_outputs,
436411
outputs_1_lst=vllm_v1_outputs,
437-
name_0="hf" if hf_outputs is not None else "vllm-v0",
412+
name_0="hf",
438413
name_1="vllm-v1",
439414
)
440415

@@ -460,11 +435,8 @@ def test_fp32_state(
460435
pass
461436

462437
with hf_runner(model) as hf_model:
463-
if model not in HF_UNSUPPORTED_MODELS:
464-
hf_outputs = hf_model.generate_greedy_logprobs_limit(
465-
example_prompts, max_tokens, num_logprobs)
466-
else:
467-
hf_outputs = None
438+
hf_outputs = hf_model.generate_greedy_logprobs_limit(
439+
example_prompts, max_tokens, num_logprobs)
468440

469441
with monkeypatch.context() as m:
470442
m.setenv("VLLM_USE_V1", "0")
@@ -480,18 +452,16 @@ def test_fp32_state(
480452
vllm_v1_outputs = vllm_model.generate_greedy_logprobs(
481453
example_prompts, max_tokens, num_logprobs)
482454

483-
if hf_outputs is not None:
484-
check_logprobs_close(
485-
outputs_0_lst=hf_outputs,
486-
outputs_1_lst=vllm_v0_outputs,
487-
name_0="hf",
488-
name_1="vllm-v0",
489-
)
455+
check_logprobs_close(
456+
outputs_0_lst=hf_outputs,
457+
outputs_1_lst=vllm_v0_outputs,
458+
name_0="hf",
459+
name_1="vllm-v0",
460+
)
490461

491-
ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
492462
check_logprobs_close(
493-
outputs_0_lst=ref_outputs,
463+
outputs_0_lst=hf_outputs,
494464
outputs_1_lst=vllm_v1_outputs,
495-
name_0="hf" if hf_outputs is not None else "vllm-v0",
465+
name_0="hf",
496466
name_1="vllm-v1",
497467
)

tests/models/registry.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def check_available_online(
154154
"BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
155155
trust_remote_code=True),
156156
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1",
157-
min_transformers_version="4.56.0",
157+
min_transformers_version="4.55.3",
158158
extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501
159159
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
160160
{"1b": "bigscience/bloomz-1b1"}),
@@ -208,7 +208,8 @@ def check_available_online(
208208
"GptOssForCausalLM": _HfExamplesInfo("lmsys/gpt-oss-20b-bf16"),
209209
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
210210
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
211-
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501
211+
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview", # noqa: E501
212+
min_transformers_version="4.55.3"),
212213
"GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts"), # noqa: E501
213214
"Grok1ModelForCausalLM": _HfExamplesInfo("hpcai-tech/grok-1",
214215
trust_remote_code=True),
@@ -228,7 +229,7 @@ def check_available_online(
228229
trust_remote_code=True),
229230
"JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
230231
"JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
231-
min_transformers_version="4.56.0",
232+
min_transformers_version="4.55.3",
232233
extras={
233234
"tiny": "ai21labs/Jamba-tiny-dev",
234235
"random": "ai21labs/Jamba-tiny-random", # noqa: E501
@@ -244,7 +245,11 @@ def check_available_online(
244245
"Llama4ForCausalLM": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct", # noqa: E501
245246
is_available_online=False),
246247
"MambaForCausalLM": _HfExamplesInfo("state-spaces/mamba-130m-hf"),
247-
"Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1"),
248+
"Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1",
249+
min_transformers_version="4.55.3",
250+
extras={
251+
"random": "yujiepan/mamba2-codestral-v0.1-tiny-random", # noqa: E501
252+
}),
248253
"FalconMambaForCausalLM": _HfExamplesInfo("tiiuae/falcon-mamba-7b-instruct"), # noqa: E501
249254
"MiniCPMForCausalLM": _HfExamplesInfo("openbmb/MiniCPM-2B-sft-bf16",
250255
trust_remote_code=True),

0 commit comments

Comments
 (0)