From dd39baf7175c5e79faef071c67bb318eadb7752f Mon Sep 17 00:00:00 2001
From: Kunshang Ji <kunshang.ji@intel.com>
Date: Wed, 17 Sep 2025 14:45:25 +0800
Subject: [PATCH] [XPU] Fix xpu model runner call torch.cuda APIs (#25011)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
---
 vllm/v1/worker/xpu_model_runner.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/worker/xpu_model_runner.py b/vllm/v1/worker/xpu_model_runner.py
index fb892211f19d..7becdd392498 100644
--- a/vllm/v1/worker/xpu_model_runner.py
+++ b/vllm/v1/worker/xpu_model_runner.py
@@ -45,8 +45,12 @@ def _torch_cuda_wrapper():
             self.synchronize = lambda: None
 
     try:
-        # replace cuda Event with xpu Event, this should work by default
+        # replace cuda APIs with xpu APIs, this should work by default
         torch.cuda.Event = torch.xpu.Event
+        torch.cuda.Stream = torch.xpu.Stream
+        torch.cuda.default_stream = torch.xpu.current_stream
+        torch.cuda.current_stream = torch.xpu.current_stream
+        torch.cuda.stream = torch.xpu.stream
         yield
     finally:
         # if anything goes wrong, just patch it with a placeholder