diff --git a/vllm/entrypoints/openai/speech_to_text.py b/vllm/entrypoints/openai/speech_to_text.py index e7589a3804c3c..09b346dcef6b8 100644 --- a/vllm/entrypoints/openai/speech_to_text.py +++ b/vllm/entrypoints/openai/speech_to_text.py @@ -24,7 +24,6 @@ from vllm.entrypoints.openai.serving_engine import (OpenAIServing, from vllm.entrypoints.openai.serving_models import OpenAIServingModels from vllm.inputs.data import PromptType from vllm.logger import init_logger -from vllm.model_executor.model_loader import get_model_cls from vllm.model_executor.models import SupportsTranscription from vllm.outputs import RequestOutput from vllm.utils import PlaceholderModule @@ -78,6 +77,7 @@ class OpenAISpeechToText(OpenAIServing): @cached_property def model_cls(self) -> type[SupportsTranscription]: + from vllm.model_executor.model_loader import get_model_cls model_cls = get_model_cls(self.model_config) return cast(type[SupportsTranscription], model_cls) diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index ee196e3f689a2..6b3291e9c92fa 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import os -from typing import Optional, Union +from typing import TYPE_CHECKING, Optional, Union import huggingface_hub import regex as re @@ -31,10 +31,14 @@ from vllm.lora.layers import (BaseLayerWithLoRA, ColumnParallelLinearWithLoRA, RowParallelLinearWithLoRA, VocabParallelEmbeddingWithLoRA) from vllm.model_executor.layers.linear import LinearBase + # yapf: enable -from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead -from vllm.model_executor.models.utils import WeightsMapper + +if TYPE_CHECKING: + from vllm.model_executor.layers.logits_processor import LogitsProcessor + from vllm.model_executor.layers.vocab_parallel_embedding import ( + ParallelLMHead) + from vllm.model_executor.models.utils import WeightsMapper logger = init_logger(__name__) @@ -75,8 +79,8 @@ def from_layer(layer: nn.Module, def from_layer_logits_processor( - layer: LogitsProcessor, - lm_head: ParallelLMHead, + layer: "LogitsProcessor", + lm_head: "ParallelLMHead", max_loras: int, lora_config: LoRAConfig, model_config: Optional[PretrainedConfig] = None, @@ -98,8 +102,8 @@ def replace_submodule(model: nn.Module, module_name: str, def parse_fine_tuned_lora_name( - name: str, - weights_mapper: Optional[WeightsMapper] = None + name: str, + weights_mapper: Optional["WeightsMapper"] = None ) -> tuple[str, bool, bool]: """Parse the name of lora weights.