mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 00:16:01 +08:00
[BugFix] Fix full cuda graph slot_mapping (#21228)
Signed-off-by: fhl2000 <63384265+fhl2000@users.noreply.github.com>
This commit is contained in:
parent
752c6ade2e
commit
2e8cbb58f3
@ -2079,7 +2079,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
block_table_tensor=self.input_batch.block_table[
|
block_table_tensor=self.input_batch.block_table[
|
||||||
kv_cache_group_id].get_device_tensor()[:num_reqs],
|
kv_cache_group_id].get_device_tensor()[:num_reqs],
|
||||||
slot_mapping=self.input_batch.
|
slot_mapping=self.input_batch.
|
||||||
block_table[kv_cache_group_id].slot_mapping[:num_reqs])
|
block_table[kv_cache_group_id].slot_mapping[:num_tokens])
|
||||||
|
|
||||||
attn_metadata_i = self.attn_metadata_builders[
|
attn_metadata_i = self.attn_metadata_builders[
|
||||||
kv_cache_group_id].build_for_cudagraph_capture(
|
kv_cache_group_id].build_for_cudagraph_capture(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user