mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 04:45:01 +08:00
[TPU][V1][Bugfix] Fix chunked prefill with padding (#15037)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
parent
3b457143d2
commit
af35d3a3cc
@ -410,6 +410,9 @@ class TPUModelRunner:
|
|||||||
# Do the padding and copy the tensors to the TPU.
|
# Do the padding and copy the tensors to the TPU.
|
||||||
padded_total_num_scheduled_tokens = _get_padded_token_len(
|
padded_total_num_scheduled_tokens = _get_padded_token_len(
|
||||||
total_num_scheduled_tokens)
|
total_num_scheduled_tokens)
|
||||||
|
# Zero out to avoid spurious values from prev iteration (last cp chunk)
|
||||||
|
self.input_ids_cpu[
|
||||||
|
total_num_scheduled_tokens:padded_total_num_scheduled_tokens] = 0
|
||||||
self.input_ids = self.input_ids_cpu[:
|
self.input_ids = self.input_ids_cpu[:
|
||||||
padded_total_num_scheduled_tokens].to(
|
padded_total_num_scheduled_tokens].to(
|
||||||
self.device)
|
self.device)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user