From d1cf8214e523ce664797b3f65a26ffdc6e81f032 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sun, 23 Nov 2025 02:22:48 +0800 Subject: [PATCH] [Bugfix] Use HF config fields as fallback when loading Mistral config (#29239) Signed-off-by: DarkLight1337 --- .buildkite/test-amd.yaml | 1 + .buildkite/test-pipeline.yaml | 1 + vllm/transformers_utils/config.py | 14 +++++++++++++- vllm/transformers_utils/configs/mistral.py | 13 ++++++++++--- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 4ee81fdabf665..f098e23866eb3 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -754,6 +754,7 @@ steps: torch_nightly: true source_file_dependencies: - vllm/model_executor/models/ + - vllm/transformers_utils/ - tests/models/test_initialization.py commands: # Only when vLLM model source is modified - test initialization of a large diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index a5719d438eece..7a46e919f93bf 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -691,6 +691,7 @@ steps: torch_nightly: true source_file_dependencies: - vllm/model_executor/models/ + - vllm/transformers_utils/ - tests/models/test_initialization.py commands: # Only when vLLM model source is modified - test initialization of a large diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 9eac7bb50afa6..db7bf228f411d 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -204,7 +204,19 @@ class MistralConfigParser(ConfigParserBase): from vllm.transformers_utils.configs.mistral import adapt_config_dict - config = adapt_config_dict(config_dict) + # Get missing fields from HF config if available + try: + hf_config_dict, _ = PretrainedConfig.get_config_dict( + model, + revision=revision, + code_revision=code_revision, + token=_get_hf_token(), + **kwargs, + ) + except OSError: # Not found + hf_config_dict = {} + + config = adapt_config_dict(config_dict, defaults=hf_config_dict) # Mistral configs may define sliding_window as list[int]. Convert it # to int and add the layer_types list[str] to make it HF compatible diff --git a/vllm/transformers_utils/configs/mistral.py b/vllm/transformers_utils/configs/mistral.py index 8da4ab35c56c3..966737aad0867 100644 --- a/vllm/transformers_utils/configs/mistral.py +++ b/vllm/transformers_utils/configs/mistral.py @@ -9,14 +9,18 @@ from vllm.logger import init_logger logger = init_logger(__name__) -def adapt_config_dict(config_dict: dict[str, Any], **kwargs) -> PretrainedConfig: - config_dict.update(kwargs) +def adapt_config_dict( + config_dict: dict[str, Any], + defaults: dict[str, Any], +) -> PretrainedConfig: config_dict = _remap_general_mistral_args(config_dict) if bool(config_dict.get("quantization")): config_dict = _remap_mistral_quantization_args(config_dict) - if bool(config_dict.get("moe")): + if config_dict.get("model_type") == "mamba": + config_dict["architectures"] = ["Mamba2ForCausalLM"] + elif bool(config_dict.get("moe")): config_dict["architectures"] = ["MixtralForCausalLM"] else: config_dict["architectures"] = ["MistralForCausalLM"] @@ -52,6 +56,9 @@ def adapt_config_dict(config_dict: dict[str, Any], **kwargs) -> PretrainedConfig if is_audio: config_dict = _remap_mistral_audio_args(config_dict) + for k, v in defaults.items(): + config_dict.setdefault(k, v) + config = PretrainedConfig.from_dict(config_dict) logger.debug("Initialized config %s", config)