diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 7c2b49702265..941aff8919a9 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -1426,6 +1426,11 @@ class ModelConfig: if getattr(self.hf_text_config, "head_dim", None) is not None: return self.hf_text_config.head_dim + # NOTE: Some models (such as PLaMo2.1) use `hidden_size_per_head` + if getattr(self.hf_text_config, "hidden_size_per_head", + None) is not None: + return self.hf_text_config.hidden_size_per_head + # FIXME(woosuk): This may not be true for all models. return (self.hf_text_config.hidden_size // self.hf_text_config.num_attention_heads)