diff --git a/vllm/config.py b/vllm/config.py index ff9579a4bb1e6..d24082799d00b 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2573,6 +2573,11 @@ class LoRAConfig: logger.warning("LoRA with chunked prefill is still experimental " "and may be unstable.") + def verify_lora_support(self): + if self.long_lora_scaling_factors is not None and envs.VLLM_USE_V1: + raise ValueError( + "V1 LoRA does not support long LoRA, please use V0.") + @dataclass class PromptAdapterConfig: @@ -3672,6 +3677,7 @@ class VllmConfig: self.lora_config.verify_with_model_config(self.model_config) self.lora_config.verify_with_scheduler_config( self.scheduler_config) + self.lora_config.verify_lora_support() if self.prompt_adapter_config: self.prompt_adapter_config.verify_with_model_config( self.model_config) diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 8164d919ca8b4..81e0741a03cf7 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -364,7 +364,7 @@ class LoRAModelManager(AdapterModelManager): self._last_mapping: Optional[LoRAMapping] = None self._create_lora_modules() self.model.lora_manager = self - self.adapter_type = 'LoRa' + self.adapter_type = 'LoRA' @property def capacity(self) -> int: