From aec572f39d8b703e4772965b99fc8dd261f71081 Mon Sep 17 00:00:00 2001 From: zhuhaoran Date: Fri, 12 Dec 2025 10:56:06 +0800 Subject: [PATCH] lint: fix mypy error Signed-off-by: zhuhaoran --- vllm/v1/worker/gpu_model_runner.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 0c7d8d2b2c435..51812c87227de 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -591,7 +591,6 @@ class GPUModelRunner( # with dedicated stream for overlapping and event for coordination. self.valid_sampled_token_count_event: torch.Event | None = None self.valid_sampled_token_count_copy_stream: torch.cuda.Stream | None = None - self.valid_sampled_token_count_cpu: torch.Tensor | None = None # Pre-allocated tensor for copying draft token ids to CPU, # with dedicated stream for overlapping and event for coordination. self.draft_token_ids_copy_event: torch.Event | None = None @@ -600,12 +599,6 @@ class GPUModelRunner( if self.use_async_scheduling and self.num_spec_tokens: self.valid_sampled_token_count_event = torch.Event() self.valid_sampled_token_count_copy_stream = torch.cuda.Stream() - self.valid_sampled_token_count_cpu = torch.empty( - self.max_num_reqs, - dtype=torch.int64, - device="cpu", - pin_memory=self.pin_memory, - ) self.draft_token_ids_copy_event = torch.Event() self.draft_token_ids_copy_stream = torch.cuda.Stream() self.draft_token_ids_cpu = torch.empty( @@ -615,6 +608,12 @@ class GPUModelRunner( pin_memory=self.pin_memory, ) self._prev_copy_draft_num_reqs: int = 0 + self.valid_sampled_token_count_cpu = torch.empty( + self.max_num_reqs, + dtype=torch.int64, + device="cpu", + pin_memory=self.pin_memory, + ) # Ephemeral state transferred between execute_model() and sample_tokens(). self.execute_model_state: ExecuteModelState | None = None