diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 97739d96bea32..27a85a3ca2297 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -13,7 +13,12 @@ from torch import nn from vllm.config.lora import LoRAConfig, ModelConfig from vllm.logger import init_logger -from vllm.lora.layers import BaseLayerWithLoRA, LoRAMapping, LoRAMappingType +from vllm.lora.layers import ( + BaseLayerWithLoRA, + FusedMoEWithLoRA, + LoRAMapping, + LoRAMappingType, +) from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights from vllm.lora.peft_helper import PEFTHelper from vllm.lora.punica_wrapper import PunicaWrapperBase, get_punica_wrapper @@ -25,7 +30,6 @@ from vllm.lora.utils import ( is_moe_model, is_regex_target_modules, parse_fine_tuned_lora_name, - process_packed_modules_mapping, replace_submodule, ) from vllm.model_executor.layers.fused_moe import FusedMoE @@ -36,7 +40,6 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.utils import PPMissingLayer, WeightsMapper from vllm.model_executor.utils import get_packed_modules_mapping from vllm.multimodal import MULTIMODAL_REGISTRY -from vllm.utils import is_pin_memory_available from vllm.utils.cache import LRUCache from vllm.utils.platform_utils import is_pin_memory_available diff --git a/vllm/v1/worker/lora_model_runner_mixin.py b/vllm/v1/worker/lora_model_runner_mixin.py index 2e40de8491217..ed6b5525fa1a7 100644 --- a/vllm/v1/worker/lora_model_runner_mixin.py +++ b/vllm/v1/worker/lora_model_runner_mixin.py @@ -175,10 +175,10 @@ class LoRAModelRunnerMixin: } self._set_active_loras( - tuple(sample_lora_mapping), - tuple(token_lora_mapping), - lora_requests, - mapping_type + tuple(sample_lora_mapping), + tuple(token_lora_mapping), + lora_requests, + mapping_type, ) yield @@ -196,7 +196,11 @@ class LoRAModelRunnerMixin: with ( self.maybe_setup_dummy_loras(lora_config, remove_lora), self.maybe_select_dummy_loras( - lora_config, num_scheduled_tokens, mapping_type, num_sampled_tokens, activate_lora + lora_config, + num_scheduled_tokens, + mapping_type, + num_sampled_tokens, + activate_lora, ), ): yield