From b91d8db873a5f4d639a5cb57288cd94ed1614bb0 Mon Sep 17 00:00:00 2001 From: Jaya Yuan Date: Sun, 12 Oct 2025 17:58:38 +0800 Subject: [PATCH] [Bugfix][DCP] Set default CUDAGraphMode to PIECEWISE for DCP (#26574) Signed-off-by: FENP <32334296+FENP@users.noreply.github.com> --- vllm/config/vllm.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index e6cfcad3d696..9d156dd8d9de 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -350,6 +350,15 @@ class VllmConfig: or self.model_config.is_encoder_decoder ): self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE + + # decode context parallel do not support full cudagraphs now. + if self.parallel_config.decode_context_parallel_size > 1: + logger.warning( + "Decode context parallel (DCP) is enabled, which is " + "incompatible with full CUDA graphs. Set " + "cudagraph_mode to PIECEWISE." + ) + self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE else: self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE