Use hidden_size_per_head as head_size fallback (#24221)

Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com>
2025-12-17 07:05:01 +08:00 · 2025-09-04 20:59:16 +09:00 · 2025-09-04 20:59:16 +09:00 · 2b30afa442
commit 2b30afa442
parent eafa8dcde6
1 changed files with 5 additions and 0 deletions
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@ -1426,6 +1426,11 @@ class ModelConfig:
        if getattr(self.hf_text_config, "head_dim", None) is not None:
            return self.hf_text_config.head_dim
        # NOTE: Some models (such as PLaMo2.1) use `hidden_size_per_head`
        if getattr(self.hf_text_config, "hidden_size_per_head",
                   None) is not None:
            return self.hf_text_config.hidden_size_per_head
        # FIXME(woosuk): This may not be true for all models.
        return (self.hf_text_config.hidden_size //
                self.hf_text_config.num_attention_heads)