[XPU]fix cuda event used in XPU model runner (#23708)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2026-05-27 02:41:19 +08:00 · 2025-08-27 15:27:14 +08:00 · 2025-08-27 15:27:14 +08:00 · 6446677839
commit 6446677839
parent 69244e67e6
1 changed files with 21 additions and 1 deletions
--- a/vllm/v1/worker/xpu_model_runner.py
+++ b/vllm/v1/worker/xpu_model_runner.py
@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from contextlib import contextmanager
 from typing import TYPE_CHECKING
 import torch
@ -22,7 +23,8 @@ class XPUModelRunner(GPUModelRunner):
        vllm_config: VllmConfig,
        device: torch.device,
    ):
-        super().__init__(vllm_config, device)
+        with _torch_cuda_wrapper():
            super().__init__(vllm_config, device)
        # FIXME: To be verified.
        self.cascade_attn_enabled = False
@ -31,3 +33,21 @@ class XPUModelRunner(GPUModelRunner):
    def _sync_device(self) -> None:
        torch.xpu.synchronize()
@contextmanager
 def _torch_cuda_wrapper():
    class _EventPlaceholder:
        def __init__(self, *args, **kwargs) -> None:
            self.record = lambda: None
            self.synchronize = lambda: None
    try:
        # replace cuda Event with xpu Event, this should work by default
        torch.cuda.Event = torch.xpu.Event
        yield
    finally:
        # if anything goes wrong, just patch it with a placeholder
        torch.cuda.Event = _EventPlaceholder