From 18f7bfb501726abbd4f155ee4fd5792b058163b6 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Wed, 2 Jul 2025 22:22:41 +0000 Subject: [PATCH] ubatching fix Signed-off-by: Sage Moore --- vllm/v1/worker/gpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 94a433c36b872..c43917baaa9eb 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1716,7 +1716,7 @@ class GPUModelRunner(LoRAModelRunnerMixin): # num_tokens = ubatch_slices[1][1].stop print(f"RUNNING UBATCH {ubatch_slices} is_dummy_run: {is_dummy_run} num_tokens_across_dp{num_tokens_across_dp}") # assert not is_dummy_run - compute_stream = torch.cuda.Stream(device=self.device) + compute_stream = torch.cuda.current_stream() ubatch_metadata = _make_ubatch_metadata( ubatch_slices=ubatch_slices, attn_metadata=attn_metadata,