From 88016c372a5962eb98f4dfc71243ccd64433710e Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 22 Aug 2025 17:47:17 +0800 Subject: [PATCH] [Bugfix] Fix pooling models on CPU backend (#23392) Signed-off-by: jiang1.li --- vllm/utils/__init__.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 1eefb32eaa90b..7079bfb8dbcee 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -1440,6 +1440,12 @@ def _patched_set_stream(stream: torch.cuda.Stream) -> None: torch.cuda.set_stream = _patched_set_stream +class _StreamPlaceholder: + + def __init__(self): + self.synchronize = lambda: None + + def current_stream() -> torch.cuda.Stream: """ replace `torch.cuda.current_stream()` with `vllm.utils.current_stream()`. @@ -1459,8 +1465,18 @@ def current_stream() -> torch.cuda.Stream: # On ROCm using the default 0 stream in combination with RCCL # is hurting performance. Therefore creating a dedicated stream # per process - _current_stream_tls.value = torch.cuda.Stream( - ) if current_platform.is_rocm() else torch.cuda.current_stream() + if current_platform.is_rocm(): + _current_stream_tls.value = torch.cuda.Stream() + elif current_platform.is_cpu(): + _current_stream_tls.value = _StreamPlaceholder() + else: + current_stream = current_platform.current_stream + if current_stream is not None: + _current_stream_tls.value = current_stream() + else: + raise ValueError( + "Fail to set current stream, current platform " + "may not support current_stream with torch API") return _current_stream_tls.value