From 7f83f40dee2e92ff005d44a262a3cf42c87c2082 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 28 Jun 2024 18:55:17 -0700
Subject: [PATCH] [Bugfix][TPU] Fix pad slot id (#5977)

---
 vllm/worker/tpu_model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py
index 7827f7c7447a1..dd08536efc5fb 100644
--- a/vllm/worker/tpu_model_runner.py
+++ b/vllm/worker/tpu_model_runner.py
@@ -19,7 +19,7 @@ from vllm.utils import make_tensor_with_pad
 
 logger = init_logger(__name__)
 
-_PAD_SLOT_ID = 0  # FIXME(woosuk)
+_PAD_SLOT_ID = -1  # NOTE(woosuk): In PyTorch XLA, index -1 is ignored.
 # FIXME(woosuk): Temporarily disabled top-p sampling since it's too slow.
 _ENABLE_TOP_P = False
 # FIXME(woosuk): A temporary hack to support `n > 1`.