[Bugfix] DeepSeek V3.2 MTP metadata & CUDA graph issues (#26779)

Signed-off-by: xiaohajiayou <923390377@qq.com>
This commit is contained in:
Haco 2025-11-01 22:52:43 +08:00 committed by GitHub
parent 30a14b034f
commit d811b442d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -109,6 +109,7 @@ class EagleProposer:
else [] else []
) )
self.use_cuda_graph = self.use_cuda_graph and bool(self.cudagraph_batch_sizes)
# persistent buffers for cuda graph # persistent buffers for cuda graph
self.input_ids = torch.zeros( self.input_ids = torch.zeros(
self.max_num_tokens, dtype=torch.int32, device=device self.max_num_tokens, dtype=torch.int32, device=device
@ -939,7 +940,7 @@ class EagleProposer:
self.vllm_config, DeepseekV32IndexerCache self.vllm_config, DeepseekV32IndexerCache
) )
draft_indexer_layer_names = indexer_layers.keys() - target_indexer_layer_names draft_indexer_layer_names = indexer_layers.keys() - target_indexer_layer_names
self.attn_layer_names = list(draft_attn_layer_names) self.attn_layer_names = list(draft_attn_layer_names - draft_indexer_layer_names)
self.indexer_layer_names = list(draft_indexer_layer_names) self.indexer_layer_names = list(draft_indexer_layer_names)
if self.indexer_layer_names: if self.indexer_layer_names:
@ -1050,16 +1051,18 @@ class EagleProposer:
num_tokens: int, num_tokens: int,
use_cudagraphs=True, use_cudagraphs=True,
) -> None: ) -> None:
if use_cudagraphs and num_tokens <= self.cudagraph_batch_sizes[-1]: # Determine if CUDA graphs should be used for this run.
cudagraphs_enabled = use_cudagraphs and self.use_cuda_graph
if cudagraphs_enabled and num_tokens <= self.cudagraph_batch_sizes[-1]:
num_tokens = self.vllm_config.pad_for_cudagraph(num_tokens) num_tokens = self.vllm_config.pad_for_cudagraph(num_tokens)
with set_forward_context( with set_forward_context(
None, None,
self.vllm_config, self.vllm_config,
num_tokens=num_tokens, num_tokens=num_tokens,
cudagraph_runtime_mode=CUDAGraphMode.PIECEWISE cudagraph_runtime_mode=(
if use_cudagraphs CUDAGraphMode.PIECEWISE if cudagraphs_enabled else CUDAGraphMode.NONE
else CUDAGraphMode.NONE, ),
): ):
if self.supports_mm_inputs: if self.supports_mm_inputs:
input_ids = None input_ids = None