From 094eaef7b31f5c0bfd6c200a972ad332be374fcf Mon Sep 17 00:00:00 2001 From: Yu Gong Date: Tue, 23 Dec 2025 20:00:19 +0000 Subject: [PATCH] remove the constraint for SD Signed-off-by: Yu Gong --- vllm/v1/cudagraph_dispatcher.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py index 9d731bde644d3..85cd3105571d8 100644 --- a/vllm/v1/cudagraph_dispatcher.py +++ b/vllm/v1/cudagraph_dispatcher.py @@ -194,15 +194,7 @@ class CudagraphDispatcher: ): return CUDAGraphMode.NONE, BatchDescriptor(num_tokens) - # When speculative decoding is enabled, always use max_loras for lookup - # since we only capture graphs with max_loras effective_num_active_loras = num_active_loras - if ( - self.vllm_config.speculative_config is not None - and self.vllm_config.lora_config is not None - and has_lora - ): - effective_num_active_loras = self.vllm_config.lora_config.max_loras batch_desc = self._create_padded_batch_descriptor( num_tokens, uniform_decode, has_lora, effective_num_active_loras