From 0c48b37c310254e83cd2906230e87af97cb148ba Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 13 Feb 2024 18:01:15 -0800 Subject: [PATCH] Fix internlm after https://github.com/vllm-project/vllm/pull/2860 (#2861) --- vllm/model_executor/models/llama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 6202e81fffa7..1d0353d7d396 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -175,7 +175,8 @@ class LlamaDecoderLayer(nn.Module): self.self_attn = LlamaAttention( hidden_size=self.hidden_size, num_heads=config.num_attention_heads, - num_kv_heads=config.num_key_value_heads, + num_kv_heads=getattr(config, "num_key_value_heads", + config.num_attention_heads), rope_theta=rope_theta, rope_scaling=rope_scaling, max_position_embeddings=max_position_embeddings,