Suppress benign cuBLAS warning when capturing cudagraphs with DBO (#25596)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
2026-01-23 12:44:27 +08:00 · 2025-09-24 12:02:08 -07:00 · 2025-09-24 12:02:08 -07:00 · f84a472a03
commit f84a472a03
parent 54e42b72db
1 changed files with 2 additions and 0 deletions
--- a/vllm/v1/worker/gpu_ubatch_wrapper.py
+++ b/vllm/v1/worker/gpu_ubatch_wrapper.py
@ -104,6 +104,7 @@ class UBatchWrapper:
            self.graph_pool = current_platform.get_global_graph_pool()

        self.sm_control = self._create_sm_control_context(vllm_config)
+        self.device = device

    @staticmethod
    def _create_sm_control_context(vllm_config: VllmConfig):
@ -168,6 +169,7 @@ class UBatchWrapper:

        @torch.inference_mode()
        def _capture_ubatch_thread(results, ubatch_metadata):
+            torch.cuda.set_device(self.device)
            ubatch_context = ubatch_metadata.context
            with torch.cuda.stream(ubatch_context.compute_stream):
                _ = torch.cuda.current_blas_handle()