[Misc] V1 LoRA support CPU offload (#15843)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-12-24 15:06:26 +08:00 · 2025-04-02 23:04:43 +08:00 · 2025-04-02 23:04:43 +08:00 · 58e234a754
commit 58e234a754
parent e86c414d6a
1 changed files with 3 additions and 3 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -2434,9 +2434,9 @@ class LoRAConfig:
                f"max_loras ({self.max_loras})")

    def verify_with_cache_config(self, cache_config: CacheConfig):
-        # TODO LoRA supports CPU offload.
-        if cache_config.cpu_offload_gb > 0:
-            raise ValueError("CPU offload is not supported with LoRA yet.")
+        if cache_config.cpu_offload_gb > 0 and not envs.VLLM_USE_V1:
+            raise ValueError(
+                "V0 LoRA does not support CPU offload, please use V1.")

    def verify_with_model_config(self, model_config: ModelConfig):
        if self.lora_dtype in (None, "auto"):