From 843af7f7fc6c9395da02da0f2f5fa2cfdcf92e81 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Wed, 22 Oct 2025 19:02:27 +0800 Subject: [PATCH] [Bugfix][CPU] Disable dual stream execution for experts on CPU (#27320) Signed-off-by: jiang1.li --- vllm/platforms/cpu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 69f2b1079aa4d..699a56be5cc4d 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -297,6 +297,9 @@ class CpuPlatform(Platform): # Disable torch async compiling which won't work with daemonic processes os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1" + # Disable multi-stream for shared experts as no Stream on CPU + os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "0" + # Intel OpenMP setting ld_prealod_str = os.getenv("LD_PRELOAD", "") if "libiomp5.so" in ld_prealod_str: