diff --git a/vllm/config.py b/vllm/config.py index ba20e3fd75125..1255d716a2e4c 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2434,9 +2434,9 @@ class LoRAConfig: f"max_loras ({self.max_loras})") def verify_with_cache_config(self, cache_config: CacheConfig): - # TODO LoRA supports CPU offload. - if cache_config.cpu_offload_gb > 0: - raise ValueError("CPU offload is not supported with LoRA yet.") + if cache_config.cpu_offload_gb > 0 and not envs.VLLM_USE_V1: + raise ValueError( + "V0 LoRA does not support CPU offload, please use V1.") def verify_with_model_config(self, model_config: ModelConfig): if self.lora_dtype in (None, "auto"):