diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 7047b23bbe27f..cd7fc5fdfcee1 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -13,7 +13,6 @@ from vllm.engine.ray_utils import initialize_ray_cluster from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger from vllm.lora.request import LoRARequest -from vllm.model_executor.model_loader import get_architecture_class_name from vllm.outputs import RequestOutput from vllm.sampling_params import SamplingParams from vllm.sequence import (MultiModalData, SamplerOutput, Sequence, @@ -115,6 +114,8 @@ class LLMEngine: # If usage stat is enabled, collect relevant info. if is_usage_stats_enabled(): + from vllm.model_executor.model_loader import ( + get_architecture_class_name) usage_message.report_usage( get_architecture_class_name(model_config), usage_context,