From 58e234a7542f6df2e769eaab4f1661a3a7494441 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 2 Apr 2025 23:04:43 +0800 Subject: [PATCH] [Misc] V1 LoRA support CPU offload (#15843) Signed-off-by: Jee Jee Li --- vllm/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index ba20e3fd75125..1255d716a2e4c 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2434,9 +2434,9 @@ class LoRAConfig: f"max_loras ({self.max_loras})") def verify_with_cache_config(self, cache_config: CacheConfig): - # TODO LoRA supports CPU offload. - if cache_config.cpu_offload_gb > 0: - raise ValueError("CPU offload is not supported with LoRA yet.") + if cache_config.cpu_offload_gb > 0 and not envs.VLLM_USE_V1: + raise ValueError( + "V0 LoRA does not support CPU offload, please use V1.") def verify_with_model_config(self, model_config: ModelConfig): if self.lora_dtype in (None, "auto"):