From e1b6bfa8244b2e53ce23521c32983de453c38fd2 Mon Sep 17 00:00:00 2001
From: Xingyu Liu <charlotteliu12x@gmail.com>
Date: Tue, 23 Dec 2025 16:02:40 -0800
Subject: [PATCH] sync with #29788

Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com>
---
 .../model_arch_config_convertor.py                   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py
index 1aa12345ec588..dc067a09419b7 100644
--- a/vllm/transformers_utils/model_arch_config_convertor.py
+++ b/vllm/transformers_utils/model_arch_config_convertor.py
@@ -74,12 +74,12 @@ class ModelArchConfigConvertorBase:
             # For ChatGLM:
             "multi_query_group_num",
         ]
-        for attr in attributes:
-            num_kv_heads = getattr(self.hf_text_config, attr, None)
-            if num_kv_heads is not None:
-                return num_kv_heads
-
-        return self.hf_text_config.num_attention_heads
+        # For non-grouped-query attention models, the number of KV heads is
+        # equal to the number of attention heads.
+        default_factory = lambda: self.hf_text_config.num_attention_heads
+        return getattr_iter(
+            self.hf_text_config, attributes, default_factory=default_factory
+        )
 
     def get_num_experts(self) -> int:
         """Returns the number of experts in the model."""