fix: incorrect bigcode attention heads num (#676)

2026-01-25 17:24:28 +08:00 · 2023-08-05 01:35:22 +08:00 · 2023-08-05 01:35:22 +08:00 · 621980bdc0
commit 621980bdc0
parent aa84c92ef6
1 changed files with 5 additions and 3 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -98,9 +98,11 @@ class ModelConfig:
        # Note: for falcon, when new_decoder_architecture is True, the
        # multi_query flag is ignored and we use n_head_kv for the number of
        # KV heads.
-        if (getattr(self.hf_config, "multi_query", False) and
-            (self.hf_config.model_type == "falcon" and
-             not getattr(self.hf_config, "new_decoder_architecture", False))):
+        new_decoder_arch_falcon = (
+            self.hf_config.model_type == "falcon"
+            and getattr(self.hf_config, "new_decoder_architecture", False))
+        if not new_decoder_arch_falcon and getattr(self.hf_config,
+                                                   "multi_query", False):
            # Multi-query attention, only one KV head.
            return 1
        # For Falcon: