mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:25:44 +08:00
[CI] [Hybrid] Speed up hybrid models test by removing large models (#22563)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
parent
a6022e6fbc
commit
1bf5e1f25b
@ -20,7 +20,7 @@ pytestmark = pytest.mark.hybrid_model
|
|||||||
SSM_MODELS = [
|
SSM_MODELS = [
|
||||||
"state-spaces/mamba-130m-hf",
|
"state-spaces/mamba-130m-hf",
|
||||||
"tiiuae/falcon-mamba-tiny-dev",
|
"tiiuae/falcon-mamba-tiny-dev",
|
||||||
"mistralai/Mamba-Codestral-7B-v0.1",
|
"yujiepan/mamba2-codestral-v0.1-tiny-random",
|
||||||
]
|
]
|
||||||
|
|
||||||
HYBRID_MODELS = [
|
HYBRID_MODELS = [
|
||||||
@ -29,8 +29,6 @@ HYBRID_MODELS = [
|
|||||||
# "pfnet/plamo-2-1b",
|
# "pfnet/plamo-2-1b",
|
||||||
"Zyphra/Zamba2-1.2B-instruct",
|
"Zyphra/Zamba2-1.2B-instruct",
|
||||||
"hmellor/tiny-random-BambaForCausalLM",
|
"hmellor/tiny-random-BambaForCausalLM",
|
||||||
"ibm-ai-platform/Bamba-9B-v1",
|
|
||||||
"nvidia/Nemotron-H-8B-Base-8K",
|
|
||||||
"ibm-granite/granite-4.0-tiny-preview",
|
"ibm-granite/granite-4.0-tiny-preview",
|
||||||
"tiiuae/Falcon-H1-0.5B-Base",
|
"tiiuae/Falcon-H1-0.5B-Base",
|
||||||
]
|
]
|
||||||
@ -40,23 +38,18 @@ HF_UNSUPPORTED_MODELS = [
|
|||||||
# Mamba2 is buggy for Codestral as it doesn't handle n_groups, so the test
|
# Mamba2 is buggy for Codestral as it doesn't handle n_groups, so the test
|
||||||
# doesn't compare vLLM output with HF output.
|
# doesn't compare vLLM output with HF output.
|
||||||
# See https://github.com/huggingface/transformers/pull/35943
|
# See https://github.com/huggingface/transformers/pull/35943
|
||||||
"mistralai/Mamba-Codestral-7B-v0.1",
|
"yujiepan/mamba2-codestral-v0.1-tiny-random",
|
||||||
# Note: I'm not seeing the same output from vLLM V0 vs. HF transformers
|
# transformers 4.55 is still producing garbage for this model
|
||||||
# for Nemotron-H-8B; currently only compare vLLM V0 vs. vLLM V1
|
# TODO(tdoublep): follow-up on transformers side
|
||||||
"nvidia/Nemotron-H-8B-Base-8K",
|
"ibm-granite/granite-4.0-tiny-preview"
|
||||||
# NOTE: Currently the test fails due to HF transformers issue fixed in:
|
|
||||||
# https://github.com/huggingface/transformers/pull/39033
|
|
||||||
# We will enable vLLM test for Granite after next HF transformers release.
|
|
||||||
"ibm-granite/granite-4.0-tiny-preview",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
V1_SUPPORTED_MODELS = [
|
V1_SUPPORTED_MODELS = [
|
||||||
"state-spaces/mamba-130m-hf",
|
"state-spaces/mamba-130m-hf",
|
||||||
"ai21labs/Jamba-tiny-dev",
|
"ai21labs/Jamba-tiny-dev",
|
||||||
"mistralai/Mamba-Codestral-7B-v0.1",
|
"yujiepan/mamba2-codestral-v0.1-tiny-random",
|
||||||
"ibm-ai-platform/Bamba-9B-v1",
|
|
||||||
"Zyphra/Zamba2-1.2B-instruct",
|
"Zyphra/Zamba2-1.2B-instruct",
|
||||||
"nvidia/Nemotron-H-8B-Base-8K",
|
"hmellor/tiny-random-BambaForCausalLM",
|
||||||
"ibm-granite/granite-4.0-tiny-preview",
|
"ibm-granite/granite-4.0-tiny-preview",
|
||||||
"tiiuae/Falcon-H1-0.5B-Base",
|
"tiiuae/Falcon-H1-0.5B-Base",
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user