diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 00869467be34..62d8354f4f9d 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -410,6 +410,9 @@ class TPUModelRunner: # Do the padding and copy the tensors to the TPU. padded_total_num_scheduled_tokens = _get_padded_token_len( total_num_scheduled_tokens) + # Zero out to avoid spurious values from prev iteration (last cp chunk) + self.input_ids_cpu[ + total_num_scheduled_tokens:padded_total_num_scheduled_tokens] = 0 self.input_ids = self.input_ids_cpu[: padded_total_num_scheduled_tokens].to( self.device)