sync with #29788

Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com>
2026-07-04 06:47:09 +08:00 · 2025-12-23 16:02:40 -08:00 · 2025-12-23 16:02:40 -08:00 · e1b6bfa824
commit e1b6bfa824
parent a55be1ffc5
1 changed files with 6 additions and 6 deletions
--- a/vllm/transformers_utils/model_arch_config_convertor.py
+++ b/vllm/transformers_utils/model_arch_config_convertor.py
@ -74,12 +74,12 @@ class ModelArchConfigConvertorBase:
            # For ChatGLM:
            "multi_query_group_num",
        ]
-        for attr in attributes:
+        # For non-grouped-query attention models, the number of KV heads is
-            num_kv_heads = getattr(self.hf_text_config, attr, None)
+        # equal to the number of attention heads.
-            if num_kv_heads is not None:
+        default_factory = lambda: self.hf_text_config.num_attention_heads
-                return num_kv_heads
+        return getattr_iter(
-
+            self.hf_text_config, attributes, default_factory=default_factory
-        return self.hf_text_config.num_attention_heads
+        )
    def get_num_experts(self) -> int:
        """Returns the number of experts in the model."""