From b4a80dad98052624364a2b896d102a77559433c0 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Wed, 24 Sep 2025 18:43:06 -0400 Subject: [PATCH] [Logging] Improve log for when DeepEP HT disables CUDA Graphs (#25531) Signed-off-by: Tyler Michael Smith Signed-off-by: yewentao256 --- vllm/platforms/cuda.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index d5f3599acb1cc..4aa4ca057f451 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -186,11 +186,12 @@ class CudaPlatformBase(Platform): # if torch compile cache key issue fixed # See https://github.com/vllm-project/vllm/pull/25093 logger.info( - "Data Parallel: disabling cudagraphs since DP " - "with DeepEP high-throughput kernels are not CUDA Graph " - "compatible. The DeepEP low-latency kernels are CUDA Graph " - "compatible. Set the all_to_all backend to deepep_low_latency " - "to use those kernels instead.") + "WideEP: Disabling CUDA Graphs since DeepEP high-throughput " + "kernels are optimized for prefill and are incompatible with " + "CUDA Graphs. " + "In order to use CUDA Graphs for decode-optimized workloads, " + "set VLLM_ALL2ALL_BACKEND to another option, such as " + "deepep_low_latency, pplx, or allgather_reducescatter.") compilation_config.cudagraph_mode = CUDAGraphMode.NONE @classmethod