diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 99205b34fb8b5..15f40bcef8969 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -2122,7 +2122,7 @@ class CUDAGraphRunner(nn.Module): **kwargs, ) -> torch.Tensor: attn_metadata: AttentionMetadata = get_forward_context().attn_metadata - + # Copy the input tensors to the input buffers. self.input_buffers["input_ids"].copy_(input_ids, non_blocking=True) if positions is not None: