[Bugfix][DCP] Set default CUDAGraphMode to PIECEWISE for DCP (#26574)

Signed-off-by: FENP <32334296+FENP@users.noreply.github.com>
2026-01-23 11:24:31 +08:00 · 2025-10-12 17:58:38 +08:00 · 2025-10-12 17:58:38 +08:00 · b91d8db873
commit b91d8db873
parent 045b396d09
1 changed files with 9 additions and 0 deletions
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@ -350,6 +350,15 @@ class VllmConfig:
                        or self.model_config.is_encoder_decoder
                    ):
                        self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
+
+                    # decode context parallel do not support full cudagraphs now.
+                    if self.parallel_config.decode_context_parallel_size > 1:
+                        logger.warning(
+                            "Decode context parallel (DCP) is enabled, which is "
+                            "incompatible with full CUDA graphs. Set "
+                            "cudagraph_mode to PIECEWISE."
+                        )
+                        self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
                else:
                    self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE