[Bugfix][TPU] Use np array when updating cache slot_mapping (#17971)

Signed-off-by: Siyuan Liu <lsiyuan@google.com>
This commit is contained in:
Siyuan Liu 2025-05-11 21:58:33 -07:00 committed by GitHub
parent 19a3c78d1f
commit 430783018c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -531,7 +531,7 @@ class TPUModelRunner(LoRAModelRunnerMixin):
np.add(block_numbers * self.block_size,
block_offsets,
out=self.input_batch.block_table.
slot_mapping_cpu[:total_num_scheduled_tokens])
slot_mapping_np[:total_num_scheduled_tokens])
# Prepare the attention metadata.
self.query_start_loc_np[0] = 0