From 95501a70ec69d182b124774ff708c3050ab4e91e Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 9 Dec 2025 13:51:19 -0500 Subject: [PATCH] [BugFix] Fix DeepSeek-R1 hang with DP and MTP (#30119) Signed-off-by: Lucas Wilkinson Signed-off-by: Lucas Wilkinson Co-authored-by: Tyler Michael Smith Co-authored-by: Matthew Bonanni --- vllm/v1/worker/gpu_model_runner.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 766c2acd0e1d8..7398defd74a38 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -4168,10 +4168,19 @@ class GPUModelRunner( if self.speculative_config and self.speculative_config.use_eagle(): assert isinstance(self.drafter, EagleProposer) + # Eagle currently only supports PIECEWISE cudagraphs. + # Therefore only use cudagraphs if the main model uses PIECEWISE + # NOTE(lucas): this is a hack, need to clean up. use_cudagraphs = ( - cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE) - and not self.speculative_config.enforce_eager - ) + ( + is_graph_capturing + and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE + ) + or ( + not is_graph_capturing + and cudagraph_runtime_mode != CUDAGraphMode.NONE + ) + ) and not self.speculative_config.enforce_eager # Note(gnovack) - We need to disable cudagraphs for one of the two # lora cases when cudagraph_specialize_lora is enabled. This is a