mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-21 22:44:31 +08:00
remove the constraint for SD
Signed-off-by: Yu Gong <yu3.gong@gmail.com>
This commit is contained in:
parent
e95e2145c5
commit
094eaef7b3
@ -194,15 +194,7 @@ class CudagraphDispatcher:
|
|||||||
):
|
):
|
||||||
return CUDAGraphMode.NONE, BatchDescriptor(num_tokens)
|
return CUDAGraphMode.NONE, BatchDescriptor(num_tokens)
|
||||||
|
|
||||||
# When speculative decoding is enabled, always use max_loras for lookup
|
|
||||||
# since we only capture graphs with max_loras
|
|
||||||
effective_num_active_loras = num_active_loras
|
effective_num_active_loras = num_active_loras
|
||||||
if (
|
|
||||||
self.vllm_config.speculative_config is not None
|
|
||||||
and self.vllm_config.lora_config is not None
|
|
||||||
and has_lora
|
|
||||||
):
|
|
||||||
effective_num_active_loras = self.vllm_config.lora_config.max_loras
|
|
||||||
|
|
||||||
batch_desc = self._create_padded_batch_descriptor(
|
batch_desc = self._create_padded_batch_descriptor(
|
||||||
num_tokens, uniform_decode, has_lora, effective_num_active_loras
|
num_tokens, uniform_decode, has_lora, effective_num_active_loras
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user