From 0e0d51c9c6c2473d731420f94f79e2c549df09f4 Mon Sep 17 00:00:00 2001
From: Sage Moore <sage@neuralmagic.com>
Date: Wed, 24 Sep 2025 12:02:08 -0700
Subject: [PATCH] Suppress benign cuBLAS warning when capturing cudagraphs with
 DBO (#25596)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 vllm/v1/worker/gpu_ubatch_wrapper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py
index d636e7af72ea1..8f6044e594184 100644
--- a/vllm/v1/worker/gpu_ubatch_wrapper.py
+++ b/vllm/v1/worker/gpu_ubatch_wrapper.py
@@ -104,6 +104,7 @@ class UBatchWrapper:
             self.graph_pool = current_platform.get_global_graph_pool()
 
         self.sm_control = self._create_sm_control_context(vllm_config)
+        self.device = device
 
     @staticmethod
     def _create_sm_control_context(vllm_config: VllmConfig):
@@ -168,6 +169,7 @@ class UBatchWrapper:
 
         @torch.inference_mode()
         def _capture_ubatch_thread(results, ubatch_metadata):
+            torch.cuda.set_device(self.device)
             ubatch_context = ubatch_metadata.context
             with torch.cuda.stream(ubatch_context.compute_stream):
                 _ = torch.cuda.current_blas_handle()