[Minor] Zero-initialize attn output buffer (#19784)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-12-13 20:04:58 +08:00 · 2025-06-17 23:59:27 -07:00 · 2025-06-17 23:59:27 -07:00 · f04d604567
commit f04d604567
parent 19a53b2783
1 changed files with 1 additions and 1 deletions
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@ -209,7 +209,7 @@ class Attention(nn.Module):
        if self.use_output:
            output_shape = (output_shape
                            if output_shape is not None else query.shape)
-            output = torch.empty(output_shape,
+            output = torch.zeros(output_shape,
                                 dtype=query.dtype,
                                 device=query.device)
            hidden_size = output_shape[-1]