mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-06 14:34:07 +08:00
[Logging] Improve log for when DeepEP HT disables CUDA Graphs (#25531)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
61a6443bc3
commit
b4a80dad98
@ -186,11 +186,12 @@ class CudaPlatformBase(Platform):
|
||||
# if torch compile cache key issue fixed
|
||||
# See https://github.com/vllm-project/vllm/pull/25093
|
||||
logger.info(
|
||||
"Data Parallel: disabling cudagraphs since DP "
|
||||
"with DeepEP high-throughput kernels are not CUDA Graph "
|
||||
"compatible. The DeepEP low-latency kernels are CUDA Graph "
|
||||
"compatible. Set the all_to_all backend to deepep_low_latency "
|
||||
"to use those kernels instead.")
|
||||
"WideEP: Disabling CUDA Graphs since DeepEP high-throughput "
|
||||
"kernels are optimized for prefill and are incompatible with "
|
||||
"CUDA Graphs. "
|
||||
"In order to use CUDA Graphs for decode-optimized workloads, "
|
||||
"set VLLM_ALL2ALL_BACKEND to another option, such as "
|
||||
"deepep_low_latency, pplx, or allgather_reducescatter.")
|
||||
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
|
||||
|
||||
@classmethod
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user