From 843af7f7fc6c9395da02da0f2f5fa2cfdcf92e81 Mon Sep 17 00:00:00 2001
From: "Li, Jiang" <jiang1.li@intel.com>
Date: Wed, 22 Oct 2025 19:02:27 +0800
Subject: [PATCH] [Bugfix][CPU] Disable dual stream execution for experts on
 CPU (#27320)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
---
 vllm/platforms/cpu.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py
index 69f2b1079aa4d..699a56be5cc4d 100644
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -297,6 +297,9 @@ class CpuPlatform(Platform):
         # Disable torch async compiling which won't work with daemonic processes
         os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
 
+        # Disable multi-stream for shared experts as no Stream on CPU
+        os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "0"
+
         # Intel OpenMP setting
         ld_prealod_str = os.getenv("LD_PRELOAD", "")
         if "libiomp5.so" in ld_prealod_str: