From 094eaef7b31f5c0bfd6c200a972ad332be374fcf Mon Sep 17 00:00:00 2001
From: Yu Gong <yu3.gong@gmail.com>
Date: Tue, 23 Dec 2025 20:00:19 +0000
Subject: [PATCH] remove the constraint for SD

Signed-off-by: Yu Gong <yu3.gong@gmail.com>
---
 vllm/v1/cudagraph_dispatcher.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py
index 9d731bde644d3..85cd3105571d8 100644
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@@ -194,15 +194,7 @@ class CudagraphDispatcher:
         ):
             return CUDAGraphMode.NONE, BatchDescriptor(num_tokens)
 
-        # When speculative decoding is enabled, always use max_loras for lookup
-        # since we only capture graphs with max_loras
         effective_num_active_loras = num_active_loras
-        if (
-            self.vllm_config.speculative_config is not None
-            and self.vllm_config.lora_config is not None
-            and has_lora
-        ):
-            effective_num_active_loras = self.vllm_config.lora_config.max_loras
 
         batch_desc = self._create_padded_batch_descriptor(
             num_tokens, uniform_decode, has_lora, effective_num_active_loras