Fix early CUDA init via get_architecture_class_name import (#3770)

Signed-off-by: Lei Wen <wenlei03@qiyi.com>
Co-authored-by: Lei Wen <wenlei03@qiyi.com>
This commit is contained in:
leiwen83 2024-04-03 02:56:26 +08:00 committed by GitHub
parent 205b94942e
commit ad6eca408b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,7 +13,6 @@ from vllm.engine.ray_utils import initialize_ray_cluster
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor.model_loader import get_architecture_class_name
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
from vllm.sequence import (MultiModalData, SamplerOutput, Sequence,
@ -115,6 +114,8 @@ class LLMEngine:
# If usage stat is enabled, collect relevant info.
if is_usage_stats_enabled():
from vllm.model_executor.model_loader import (
get_architecture_class_name)
usage_message.report_usage(
get_architecture_class_name(model_config),
usage_context,