From 0f85cfdc659ee120e51741bd1d69775ca0dacafa Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Tue, 16 Dec 2025 11:58:06 -0800 Subject: [PATCH] update with main Signed-off-by: Xingyu Liu --- .../model_arch_config_convertor.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py index d785dce3d32e5..705f5207b3687 100644 --- a/vllm/transformers_utils/model_arch_config_convertor.py +++ b/vllm/transformers_utils/model_arch_config_convertor.py @@ -26,6 +26,9 @@ class ModelArchConfigConvertorBase: self.hf_config = hf_config self.hf_text_config = hf_text_config + def get_architectures(self) -> list[str]: + return getattr(self.hf_config, "architectures", []) + def get_num_hidden_layers(self) -> int: return getattr(self.hf_text_config, "num_hidden_layers", 0) @@ -240,7 +243,7 @@ class ModelArchConfigConvertorBase: def convert(self, model_id: str, revision: str | None) -> ModelArchitectureConfig: model_arch_config = ModelArchitectureConfig( - architectures=getattr(self.hf_config, "architectures", []), + architectures=self.get_architectures(), model_type=self.hf_config.model_type, text_model_type=getattr(self.hf_text_config, "model_type", None), hidden_size=self.get_hidden_size(), @@ -331,7 +334,15 @@ class NemotronNasModelArchConfigConvertor(ModelArchConfigConvertorBase): self.hf_text_config.num_attention_heads // block.attention.n_heads_in_group ) - raise RuntimeError("Couldn't determine number of kv heads") + raise RuntimeError( + "Could not determine the number of key-value attention heads " + "from model configuration. " + f"Architecture: {self.get_architectures()}. " + "This usually indicates an unsupported model architecture or " + "missing configuration. " + "Please check if your model is supported at: " + "https://docs.vllm.ai/en/latest/models/supported_models.html" + ) class DeepSeekMTPModelArchConfigConvertor(ModelArchConfigConvertorBase):