[misc] Do not allow to use lora with chunked prefill. (#5538)

Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2026-01-17 00:04:35 +08:00 · 2024-06-15 23:59:36 +09:00 · 2024-06-15 23:59:36 +09:00 · e691918e3b
commit e691918e3b
parent 81fbb3655f
1 changed files with 2 additions and 0 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -1092,6 +1092,8 @@ class LoRAConfig:
                "Due to limitations of the custom LoRA CUDA kernel, "
                "max_num_batched_tokens must be <= 65528 when "
                "LoRA is enabled.")
+        if scheduler_config.chunked_prefill_enabled:
+            raise ValueError("LoRA is not supported with chunked prefill yet.")


@dataclass