From 0e0d51c9c6c2473d731420f94f79e2c549df09f4 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Wed, 24 Sep 2025 12:02:08 -0700 Subject: [PATCH] Suppress benign cuBLAS warning when capturing cudagraphs with DBO (#25596) Signed-off-by: Sage Moore Signed-off-by: yewentao256 --- vllm/v1/worker/gpu_ubatch_wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index d636e7af72ea1..8f6044e594184 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -104,6 +104,7 @@ class UBatchWrapper: self.graph_pool = current_platform.get_global_graph_pool() self.sm_control = self._create_sm_control_context(vllm_config) + self.device = device @staticmethod def _create_sm_control_context(vllm_config: VllmConfig): @@ -168,6 +169,7 @@ class UBatchWrapper: @torch.inference_mode() def _capture_ubatch_thread(results, ubatch_metadata): + torch.cuda.set_device(self.device) ubatch_context = ubatch_metadata.context with torch.cuda.stream(ubatch_context.compute_stream): _ = torch.cuda.current_blas_handle()