refactor tests

Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com>
This commit is contained in:
Xingyu Liu 2025-12-09 23:25:02 -08:00
parent 0cd72dc438
commit 5401f6529d

View File

@ -1,163 +1,144 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests for ModelArchitectureConfig and its integration with ModelConfig."""
import json import json
from pathlib import Path from pathlib import Path
import pytest
from vllm.config import ModelConfig, ParallelConfig, SpeculativeConfig from vllm.config import ModelConfig, ParallelConfig, SpeculativeConfig
BASE_TRUST_REMOTE_CODE_MODELS = {
"nvidia/Llama-3_3-Nemotron-Super-49B-v1",
"XiaomiMiMo/MiMo-7B-RL",
# Excluded: Not available online right now
# "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1",
"meituan-longcat/LongCat-Flash-Chat",
}
def test_basic(): BASE_MODELS_TO_TEST = [
trust_remote_code_models = [ "state-spaces/mamba-130m-hf",
"nvidia/Llama-3_3-Nemotron-Super-49B-v1", "mistralai/Mamba-Codestral-7B-v0.1",
"XiaomiMiMo/MiMo-7B-RL", # Excluded: terratorch/torchgeo version mismatch in CPU CI environment
# Excluded: Not available online right now # (NonGeoDataset import error). Tested in model initialization tests.
# "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1", # "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",
"meituan-longcat/LongCat-Flash-Chat", "Zyphra/Zamba2-7B-instruct",
] "mosaicml/mpt-7b",
models_to_test = [ "databricks/dbrx-instruct",
"state-spaces/mamba-130m-hf", "tiiuae/falcon-7b",
"mistralai/Mamba-Codestral-7B-v0.1", "tiiuae/falcon-40b",
# Excluded: terratorch/torchgeo version mismatch in "luccafong/deepseek_mtp_main_random",
# Async Engine, Inputs, Utils, Worker, Config Test (CPU) CI test environment "Qwen/Qwen3-Next-80B-A3B-Instruct",
# (NonGeoDataset import error). "tiny-random/qwen3-next-moe",
# "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11", "zai-org/GLM-4.5",
"Zyphra/Zamba2-7B-instruct", "baidu/ERNIE-4.5-21B-A3B-PT",
"mosaicml/mpt-7b", # Models using base convertor
"databricks/dbrx-instruct", "lmsys/gpt-oss-20b-bf16",
"tiiuae/falcon-7b", "deepseek-ai/DeepSeek-V3.2-Exp",
"tiiuae/falcon-40b", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
"luccafong/deepseek_mtp_main_random", ] + list(BASE_TRUST_REMOTE_CODE_MODELS)
"luccafong/deepseek_mtp_draft_random",
"Qwen/Qwen3-Next-80B-A3B-Instruct",
"tiny-random/qwen3-next-moe",
"zai-org/GLM-4.5",
"baidu/ERNIE-4.5-21B-A3B-PT",
# Models using base convertor
"lmsys/gpt-oss-20b-bf16",
"deepseek-ai/DeepSeek-V3.2-Exp",
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
] + trust_remote_code_models
groundtruth_path = Path(__file__).parent / "base_model_arch_groundtruth.json" # (target_model, draft_model, trust_remote_code)
SPECULATIVE_MODELS = [
("JackFram/llama-68m", "abhigoyal/vllm-medusa-llama-68m-random", False),
("luccafong/deepseek_mtp_main_random", "luccafong/deepseek_mtp_draft_random", True),
("eagle618/deepseek-v3-random", "eagle618/eagle-deepseek-v3-random", True),
("meta-llama/Meta-Llama-3-8B-Instruct", "yuhuili/EAGLE-LLaMA3-Instruct-8B", True),
("meta-llama/Llama-3.1-8B-Instruct", "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", True),
]
def _load_groundtruth(filename: str) -> dict:
"""Load groundtruth JSON from the test directory."""
groundtruth_path = Path(__file__).parent / filename
with open(groundtruth_path) as f: with open(groundtruth_path) as f:
model_arch_groundtruth = json.load(f) return json.load(f)
for model in models_to_test:
print(f"testing {model=}")
model_config = ModelConfig(
model, trust_remote_code=model in trust_remote_code_models
)
model_arch_config = model_config.model_arch_config def _assert_model_arch_config(
expected = model_arch_groundtruth[model] model_arch_config, expected: dict, check_head_size: bool = True
assert model_arch_config.architectures == expected["architectures"] ):
assert model_arch_config.model_type == expected["model_type"] """Assert model_arch_config matches expected values."""
assert model_arch_config.text_model_type == expected["text_model_type"] assert model_arch_config.architectures == expected["architectures"]
assert model_arch_config.hidden_size == expected["hidden_size"] assert model_arch_config.model_type == expected["model_type"]
assert ( assert model_arch_config.text_model_type == expected["text_model_type"]
model_arch_config.total_num_hidden_layers assert model_arch_config.hidden_size == expected["hidden_size"]
== expected["total_num_hidden_layers"] assert (
) model_arch_config.total_num_hidden_layers == expected["total_num_hidden_layers"]
assert ( )
model_arch_config.total_num_attention_heads assert (
== expected["total_num_attention_heads"] model_arch_config.total_num_attention_heads
) == expected["total_num_attention_heads"]
)
assert model_arch_config.vocab_size == expected["vocab_size"]
assert model_arch_config.total_num_kv_heads == expected["total_num_kv_heads"]
assert model_arch_config.num_experts == expected["num_experts"]
assert model_arch_config.is_deepseek_mla == expected["is_deepseek_mla"]
assert str(model_arch_config.torch_dtype) == expected["dtype"]
if check_head_size:
assert model_arch_config.head_size == expected["head_size"] assert model_arch_config.head_size == expected["head_size"]
assert model_arch_config.vocab_size == expected["vocab_size"]
assert model_arch_config.total_num_kv_heads == expected["total_num_kv_heads"]
assert model_arch_config.num_experts == expected["num_experts"]
assert model_arch_config.is_deepseek_mla == expected["is_deepseek_mla"]
dtype = model_arch_config.torch_dtype
assert str(dtype) == expected["dtype"]
# Ensure model_config methods return expected values
assert model_config.architectures == expected["architectures"] def _assert_model_config_methods(
assert model_config.get_vocab_size() == expected["vocab_size"] model_config, expected: dict, check_head_size: bool = True
assert model_config.get_hidden_size() == expected["hidden_size"] ):
"""Assert model_config methods return expected values."""
assert model_config.architectures == expected["architectures"]
assert model_config.get_vocab_size() == expected["vocab_size"]
assert model_config.get_hidden_size() == expected["hidden_size"]
assert model_config.get_total_num_kv_heads() == expected["total_num_kv_heads"]
assert model_config.get_num_experts() == expected["num_experts"]
assert (
model_config.get_total_num_hidden_layers()
== expected["total_num_hidden_layers"]
)
if check_head_size:
assert model_config.get_head_size() == expected["head_size"] assert model_config.get_head_size() == expected["head_size"]
assert model_config.get_total_num_kv_heads() == expected["total_num_kv_heads"]
assert model_config.get_num_experts() == expected["num_experts"]
assert (
model_config.get_total_num_hidden_layers()
== expected["total_num_hidden_layers"]
)
def test_draft_models(): @pytest.mark.parametrize("model", BASE_MODELS_TO_TEST)
speculative_models = [ def test_base_model_arch_config(model: str):
("JackFram/llama-68m", "abhigoyal/vllm-medusa-llama-68m-random", False), """Test model architecture config for base models."""
( groundtruth = _load_groundtruth("base_model_arch_groundtruth.json")
"luccafong/deepseek_mtp_main_random", expected = groundtruth[model]
"luccafong/deepseek_mtp_draft_random",
True,
),
("eagle618/deepseek-v3-random", "eagle618/eagle-deepseek-v3-random", True),
(
"meta-llama/Meta-Llama-3-8B-Instruct",
"yuhuili/EAGLE-LLaMA3-Instruct-8B",
True,
),
(
"meta-llama/Llama-3.1-8B-Instruct",
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
True,
),
]
groundtruth_path = Path(__file__).parent / "draft_model_arch_groundtruth.json" model_config = ModelConfig(
with open(groundtruth_path) as f: model, trust_remote_code=model in BASE_TRUST_REMOTE_CODE_MODELS
model_arch_groundtruth = json.load(f) )
for target_model, draft_model, trust_remote_code in speculative_models: _assert_model_arch_config(model_config.model_arch_config, expected)
print(f"testing {target_model=} {draft_model=}") _assert_model_config_methods(model_config, expected)
target_model_config = ModelConfig(
target_model, trust_remote_code=trust_remote_code
)
speculative_config = {
"model": draft_model,
"num_speculative_tokens": 1,
"target_model_config": target_model_config,
"target_parallel_config": ParallelConfig(),
}
speculative_config = SpeculativeConfig(**speculative_config)
model_config = speculative_config.draft_model_config
model_arch_config = model_config.model_arch_config @pytest.mark.parametrize(
expected = model_arch_groundtruth[draft_model] "target_model,draft_model,trust_remote_code", SPECULATIVE_MODELS
assert model_arch_config.architectures == expected["architectures"] )
assert model_arch_config.model_type == expected["model_type"] def test_draft_model_arch_config(
assert model_arch_config.text_model_type == expected["text_model_type"] target_model: str, draft_model: str, trust_remote_code: bool
assert model_arch_config.hidden_size == expected["hidden_size"] ):
assert ( """Test model architecture config for draft/speculative models."""
model_arch_config.total_num_hidden_layers groundtruth = _load_groundtruth("draft_model_arch_groundtruth.json")
== expected["total_num_hidden_layers"] expected = groundtruth[draft_model]
)
assert (
model_arch_config.total_num_attention_heads
== expected["total_num_attention_heads"]
)
assert model_arch_config.vocab_size == expected["vocab_size"] target_model_config = ModelConfig(target_model, trust_remote_code=trust_remote_code)
assert model_arch_config.total_num_kv_heads == expected["total_num_kv_heads"] speculative_config = SpeculativeConfig(
assert model_arch_config.num_experts == expected["num_experts"] model=draft_model,
assert model_arch_config.is_deepseek_mla == expected["is_deepseek_mla"] num_speculative_tokens=1,
dtype = model_arch_config.torch_dtype target_model_config=target_model_config,
assert str(dtype) == expected["dtype"] target_parallel_config=ParallelConfig(),
)
model_config = speculative_config.draft_model_config
# Ensure model_config methods return expected values # For medusa models, head_size may cause division by zero before
assert model_config.architectures == expected["architectures"] # model_arch_config was introduced, so we conditionally check it
assert model_config.get_vocab_size() == expected["vocab_size"] check_head_size = isinstance(expected["head_size"], int)
assert model_config.get_hidden_size() == expected["hidden_size"]
assert model_config.get_total_num_kv_heads() == expected["total_num_kv_heads"]
assert model_config.get_num_experts() == expected["num_experts"]
assert (
model_config.get_total_num_hidden_layers()
== expected["total_num_hidden_layers"]
)
if isinstance(expected["head_size"], int): _assert_model_arch_config(
# Before model_arch_config is introduced, get_head_size() for medusa model_config.model_arch_config, expected, check_head_size=check_head_size
# model config will throw out `integer division or modulo by zero` error. )
assert model_arch_config.head_size == expected["head_size"] _assert_model_config_methods(
assert model_config.get_head_size() == expected["head_size"] model_config, expected, check_head_size=check_head_size
)