mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 16:27:09 +08:00
[BugFix] Fix DeepSeek-R1 hang with DP and MTP (#30119)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
e858bfe051
commit
95501a70ec
@ -4168,10 +4168,19 @@ class GPUModelRunner(
|
|||||||
|
|
||||||
if self.speculative_config and self.speculative_config.use_eagle():
|
if self.speculative_config and self.speculative_config.use_eagle():
|
||||||
assert isinstance(self.drafter, EagleProposer)
|
assert isinstance(self.drafter, EagleProposer)
|
||||||
|
# Eagle currently only supports PIECEWISE cudagraphs.
|
||||||
|
# Therefore only use cudagraphs if the main model uses PIECEWISE
|
||||||
|
# NOTE(lucas): this is a hack, need to clean up.
|
||||||
use_cudagraphs = (
|
use_cudagraphs = (
|
||||||
cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
|
(
|
||||||
and not self.speculative_config.enforce_eager
|
is_graph_capturing
|
||||||
)
|
and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
not is_graph_capturing
|
||||||
|
and cudagraph_runtime_mode != CUDAGraphMode.NONE
|
||||||
|
)
|
||||||
|
) and not self.speculative_config.enforce_eager
|
||||||
|
|
||||||
# Note(gnovack) - We need to disable cudagraphs for one of the two
|
# Note(gnovack) - We need to disable cudagraphs for one of the two
|
||||||
# lora cases when cudagraph_specialize_lora is enabled. This is a
|
# lora cases when cudagraph_specialize_lora is enabled. This is a
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user