From 0a02744dc8468d70f8e11c1c914616f338e3e98d Mon Sep 17 00:00:00 2001 From: Alexander Matveev Date: Fri, 31 Jan 2025 01:18:56 +0000 Subject: [PATCH] fix TP --- vllm/worker/model_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 0f654576aa67d..398f122356f44 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1975,7 +1975,8 @@ class CUDAGraphRunner(nn.Module): # Copy the input tensors to the input buffers. self.input_buffers["input_ids"].copy_(input_ids, non_blocking=True) - self.input_buffers["positions"].copy_(positions, non_blocking=True) + if positions is not None: + self.input_buffers["positions"].copy_(positions, non_blocking=True) if self.backend_name != "NO_ATTENTION": self.input_buffers["slot_mapping"].copy_(