From e1b6bfa8244b2e53ce23521c32983de453c38fd2 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Tue, 23 Dec 2025 16:02:40 -0800 Subject: [PATCH] sync with #29788 Signed-off-by: Xingyu Liu --- .../model_arch_config_convertor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py index 1aa12345ec588..dc067a09419b7 100644 --- a/vllm/transformers_utils/model_arch_config_convertor.py +++ b/vllm/transformers_utils/model_arch_config_convertor.py @@ -74,12 +74,12 @@ class ModelArchConfigConvertorBase: # For ChatGLM: "multi_query_group_num", ] - for attr in attributes: - num_kv_heads = getattr(self.hf_text_config, attr, None) - if num_kv_heads is not None: - return num_kv_heads - - return self.hf_text_config.num_attention_heads + # For non-grouped-query attention models, the number of KV heads is + # equal to the number of attention heads. + default_factory = lambda: self.hf_text_config.num_attention_heads + return getattr_iter( + self.hf_text_config, attributes, default_factory=default_factory + ) def get_num_experts(self) -> int: """Returns the number of experts in the model."""