mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-31 08:37:04 +08:00
remove the constraint for SD
Signed-off-by: Yu Gong <yu3.gong@gmail.com>
This commit is contained in:
parent
e95e2145c5
commit
094eaef7b3
@ -194,15 +194,7 @@ class CudagraphDispatcher:
|
||||
):
|
||||
return CUDAGraphMode.NONE, BatchDescriptor(num_tokens)
|
||||
|
||||
# When speculative decoding is enabled, always use max_loras for lookup
|
||||
# since we only capture graphs with max_loras
|
||||
effective_num_active_loras = num_active_loras
|
||||
if (
|
||||
self.vllm_config.speculative_config is not None
|
||||
and self.vllm_config.lora_config is not None
|
||||
and has_lora
|
||||
):
|
||||
effective_num_active_loras = self.vllm_config.lora_config.max_loras
|
||||
|
||||
batch_desc = self._create_padded_batch_descriptor(
|
||||
num_tokens, uniform_decode, has_lora, effective_num_active_loras
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user