From 1afa9948f5931109cbe5201fee4fd7614ff7f904 Mon Sep 17 00:00:00 2001 From: Brayden Zhong Date: Tue, 24 Jun 2025 22:42:53 -0400 Subject: [PATCH] [Llama4] Update `attn_temperature_tuning` (#19997) Signed-off-by: Brayden Zhong --- vllm/model_executor/models/llama4.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py index 9fb73261cd89..0c9baab1f2e4 100644 --- a/vllm/model_executor/models/llama4.py +++ b/vllm/model_executor/models/llama4.py @@ -148,9 +148,8 @@ class Llama4Attention(nn.Module): self.q_size = self.num_heads * self.head_dim self.kv_size = self.num_kv_heads * self.head_dim self.scaling = self.head_dim**-0.5 - # TODO: attn_temperature_tuning should be a bool in huggingface self.attn_temperature_tuning = self.nope and \ - config.attn_temperature_tuning > 0 + config.attn_temperature_tuning self.floor_scale = getattr(config, "floor_scale", 8192.0) self.attn_scale = getattr(config, "attn_scale", 0.1)