[BugFix] Fix full cuda graph slot_mapping (#21228)

Signed-off-by: fhl2000 <63384265+fhl2000@users.noreply.github.com>
2026-03-18 07:37:06 +08:00 · 2025-07-20 05:13:18 +08:00 · 2025-07-20 05:13:18 +08:00 · 2e8cbb58f3
commit 2e8cbb58f3
parent 752c6ade2e
1 changed files with 1 additions and 1 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -2079,7 +2079,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                    block_table_tensor=self.input_batch.block_table[
                        kv_cache_group_id].get_device_tensor()[:num_reqs],
                    slot_mapping=self.input_batch.
-                    block_table[kv_cache_group_id].slot_mapping[:num_reqs])
+                    block_table[kv_cache_group_id].slot_mapping[:num_tokens])

                attn_metadata_i = self.attn_metadata_builders[
                    kv_cache_group_id].build_for_cudagraph_capture(