From d3ab240f39219df0175ec662416f630d7bf273d8 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 28 Oct 2025 19:53:12 -0400 Subject: [PATCH] [Bug] Fix deepep low latency use nvlink by default (#27677) Signed-off-by: yewentao256 --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 73bb2678ea85e..018af0e5bba8f 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -205,7 +205,7 @@ if TYPE_CHECKING: VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER" VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024 VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE: bool = False - VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK: bool = False + VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK: bool = True VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL: bool = False VLLM_DBO_COMM_SMS: int = 20 GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] @@ -1362,7 +1362,7 @@ environment_variables: dict[str, Callable[[], Any]] = { # Allow DeepEP to use nvlink for internode_ll kernel, turn this on for # better latency on GB200 like system "VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK": lambda: bool( - int(os.getenv("VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK", "0")) + int(os.getenv("VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK", "1")) ), # Allow DeepEP to use MNNVL (multi-node nvlink) for internode_ll kernel, # turn this for better latency on GB200 like system