[BugFix] Fix DeepSeek-R1 hang with DP and MTP (#30119)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Lucas Wilkinson 2025-12-09 13:51:19 -05:00 committed by GitHub
parent e858bfe051
commit 95501a70ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4168,10 +4168,19 @@ class GPUModelRunner(
if self.speculative_config and self.speculative_config.use_eagle():
assert isinstance(self.drafter, EagleProposer)
# Eagle currently only supports PIECEWISE cudagraphs.
# Therefore only use cudagraphs if the main model uses PIECEWISE
# NOTE(lucas): this is a hack, need to clean up.
use_cudagraphs = (
cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
and not self.speculative_config.enforce_eager
)
(
is_graph_capturing
and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
)
or (
not is_graph_capturing
and cudagraph_runtime_mode != CUDAGraphMode.NONE
)
) and not self.speculative_config.enforce_eager
# Note(gnovack) - We need to disable cudagraphs for one of the two
# lora cases when cudagraph_specialize_lora is enabled. This is a