mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-30 07:05:16 +08:00
[BugFix] Fix DeepSeek-R1 hang with DP and MTP (#30119)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
e858bfe051
commit
95501a70ec
@ -4168,10 +4168,19 @@ class GPUModelRunner(
|
||||
|
||||
if self.speculative_config and self.speculative_config.use_eagle():
|
||||
assert isinstance(self.drafter, EagleProposer)
|
||||
# Eagle currently only supports PIECEWISE cudagraphs.
|
||||
# Therefore only use cudagraphs if the main model uses PIECEWISE
|
||||
# NOTE(lucas): this is a hack, need to clean up.
|
||||
use_cudagraphs = (
|
||||
cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
|
||||
and not self.speculative_config.enforce_eager
|
||||
)
|
||||
(
|
||||
is_graph_capturing
|
||||
and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
|
||||
)
|
||||
or (
|
||||
not is_graph_capturing
|
||||
and cudagraph_runtime_mode != CUDAGraphMode.NONE
|
||||
)
|
||||
) and not self.speculative_config.enforce_eager
|
||||
|
||||
# Note(gnovack) - We need to disable cudagraphs for one of the two
|
||||
# lora cases when cudagraph_specialize_lora is enabled. This is a
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user