[Misc] V1 LoRA support CPU offload (#15843)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li 2025-04-02 23:04:43 +08:00 committed by GitHub
parent e86c414d6a
commit 58e234a754
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2434,9 +2434,9 @@ class LoRAConfig:
f"max_loras ({self.max_loras})")
def verify_with_cache_config(self, cache_config: CacheConfig):
# TODO LoRA supports CPU offload.
if cache_config.cpu_offload_gb > 0:
raise ValueError("CPU offload is not supported with LoRA yet.")
if cache_config.cpu_offload_gb > 0 and not envs.VLLM_USE_V1:
raise ValueError(
"V0 LoRA does not support CPU offload, please use V1.")
def verify_with_model_config(self, model_config: ModelConfig):
if self.lora_dtype in (None, "auto"):