mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 11:16:45 +08:00
[Bugfix][TPU] Use np array when updating cache slot_mapping (#17971)
Signed-off-by: Siyuan Liu <lsiyuan@google.com>
This commit is contained in:
parent
19a3c78d1f
commit
430783018c
@ -531,7 +531,7 @@ class TPUModelRunner(LoRAModelRunnerMixin):
|
||||
np.add(block_numbers * self.block_size,
|
||||
block_offsets,
|
||||
out=self.input_batch.block_table.
|
||||
slot_mapping_cpu[:total_num_scheduled_tokens])
|
||||
slot_mapping_np[:total_num_scheduled_tokens])
|
||||
|
||||
# Prepare the attention metadata.
|
||||
self.query_start_loc_np[0] = 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user