mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-17 00:04:35 +08:00
[misc] Do not allow to use lora with chunked prefill. (#5538)
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
parent
81fbb3655f
commit
e691918e3b
@ -1092,6 +1092,8 @@ class LoRAConfig:
|
||||
"Due to limitations of the custom LoRA CUDA kernel, "
|
||||
"max_num_batched_tokens must be <= 65528 when "
|
||||
"LoRA is enabled.")
|
||||
if scheduler_config.chunked_prefill_enabled:
|
||||
raise ValueError("LoRA is not supported with chunked prefill yet.")
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user