remove the constraint for SD

Signed-off-by: Yu Gong <yu3.gong@gmail.com>
2026-05-21 22:44:31 +08:00 · 2025-12-23 20:00:19 +00:00 · 2025-12-23 20:00:19 +00:00 · 094eaef7b3
commit 094eaef7b3
parent e95e2145c5
1 changed files with 0 additions and 8 deletions
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@ -194,15 +194,7 @@ class CudagraphDispatcher:
        ):
            return CUDAGraphMode.NONE, BatchDescriptor(num_tokens)
        # When speculative decoding is enabled, always use max_loras for lookup
        # since we only capture graphs with max_loras
        effective_num_active_loras = num_active_loras
        if (
            self.vllm_config.speculative_config is not None
            and self.vllm_config.lora_config is not None
            and has_lora
        ):
            effective_num_active_loras = self.vllm_config.lora_config.max_loras
        batch_desc = self._create_padded_batch_descriptor(
            num_tokens, uniform_decode, has_lora, effective_num_active_loras