[Minor] Group async_scheduling related fields in model runner init (#26736)

Signed-off-by: Nick Hill <nhill@redhat.com>
2026-07-18 05:07:10 +08:00 · 2025-10-14 14:46:37 -07:00 · 2025-10-14 14:46:37 -07:00 · ff4810ba73
commit ff4810ba73
parent 9d6964926e
1 changed files with 9 additions and 11 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -375,9 +375,15 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        )
        self.use_async_scheduling = self.scheduler_config.async_scheduling
-        self.async_output_copy_stream = (
+        # Separate cuda stream for overlapping transfer of sampled token ids from
-            torch.cuda.Stream() if self.use_async_scheduling else None
+        # GPU to CPU when async scheduling is enabled.
-        )
+        self.async_output_copy_stream: torch.cuda.Stream | None = None
        # cuda event to synchronize use of reused CPU tensors between steps
        # when async scheduling is enabled.
        self.prepare_inputs_event: torch.cuda.Event | None = None
        if self.use_async_scheduling:
            self.async_output_copy_stream = torch.cuda.Stream()
            self.prepare_inputs_event = torch.cuda.Event()
        # TODO(woosuk): Provide an option to tune the max cudagraph batch size.
        # The convention is different.
@ -444,14 +450,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                (3, self.max_num_tokens + 1), dtype=torch.int64
            )
        # CUDA event to synchronize use of reused CPU tensors between steps
        # when async scheduling is enabled.
        self.prepare_inputs_event: torch.cuda.Event | None = None
        if self.use_async_scheduling:
            self.prepare_inputs_event = torch.cuda.Event()
            # Start in a completed state.
            self.prepare_inputs_event.record(torch.cuda.default_stream())
        # None in the first PP rank. The rest are set after load_model.
        self.intermediate_tensors: IntermediateTensors | None = None