diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 1ee9c070226c0..670e653929cea 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2079,7 +2079,7 @@ class GPUModelRunner(LoRAModelRunnerMixin): block_table_tensor=self.input_batch.block_table[ kv_cache_group_id].get_device_tensor()[:num_reqs], slot_mapping=self.input_batch. - block_table[kv_cache_group_id].slot_mapping[:num_reqs]) + block_table[kv_cache_group_id].slot_mapping[:num_tokens]) attn_metadata_i = self.attn_metadata_builders[ kv_cache_group_id].build_for_cudagraph_capture(