[Bugfix][CPU] Disable dual stream execution for experts on CPU (#27320)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
2026-03-17 15:27:19 +08:00 · 2025-10-22 19:02:27 +08:00 · 2025-10-22 19:02:27 +08:00 · 843af7f7fc
commit 843af7f7fc
parent 1f633b8632
1 changed files with 3 additions and 0 deletions
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@ -297,6 +297,9 @@ class CpuPlatform(Platform):
        # Disable torch async compiling which won't work with daemonic processes
        os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"

+        # Disable multi-stream for shared experts as no Stream on CPU
+        os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "0"
+
        # Intel OpenMP setting
        ld_prealod_str = os.getenv("LD_PRELOAD", "")
        if "libiomp5.so" in ld_prealod_str: