From 3f532cb6a69e51a6578b85642fcba34ac348f7a4 Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Thu, 27 Mar 2025 17:21:23 +0800 Subject: [PATCH] [Misc] Use model_redirect to redirect the model name to a local folder. (#14116) --- vllm/config.py | 10 ++++++--- vllm/envs.py | 5 +++++ vllm/transformers_utils/utils.py | 38 ++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 62800afc3e699..687c8b56ec126 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -38,7 +38,7 @@ from vllm.transformers_utils.config import ( get_sentence_transformer_tokenizer_config, is_encoder_decoder, try_get_generation_config, uses_mrope) from vllm.transformers_utils.s3_utils import S3Model -from vllm.transformers_utils.utils import is_s3 +from vllm.transformers_utils.utils import is_s3, maybe_model_redirect from vllm.utils import (GiB_bytes, LayerBlockType, cuda_device_count_stateless, get_cpu_memory, random_uuid, resolve_obj_by_qualname) @@ -266,9 +266,13 @@ class ModelConfig: override_generation_config: Optional[dict[str, Any]] = None, model_impl: Union[str, ModelImpl] = ModelImpl.AUTO, ) -> None: - self.model = model + self.model = maybe_model_redirect(model) + self.tokenizer = maybe_model_redirect(tokenizer) + self.hf_config_path = hf_config_path - self.tokenizer = tokenizer + if isinstance(hf_config_path, str): + self.hf_config_path = maybe_model_redirect(hf_config_path) + self.tokenizer_mode = tokenizer_mode self.trust_remote_code = trust_remote_code self.allowed_local_media_path = allowed_local_media_path diff --git a/vllm/envs.py b/vllm/envs.py index e16753191c6e2..23c304f124d36 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -22,6 +22,7 @@ if TYPE_CHECKING: S3_ACCESS_KEY_ID: Optional[str] = None S3_SECRET_ACCESS_KEY: Optional[str] = None S3_ENDPOINT_URL: Optional[str] = None + VLLM_MODEL_REDIRECT_PATH: Optional[str] = None VLLM_CACHE_ROOT: str = os.path.expanduser("~/.cache/vllm") VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm") VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai" @@ -635,6 +636,10 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_CI_USE_S3": lambda: os.environ.get("VLLM_CI_USE_S3", "0") == "1", + # Use model_redirect to redirect the model name to a local folder. + "VLLM_MODEL_REDIRECT_PATH": + lambda: os.environ.get("VLLM_MODEL_REDIRECT_PATH", None), + # Whether to use atomicAdd reduce in gptq/awq marlin kernel. "VLLM_MARLIN_USE_ATOMIC_ADD": lambda: os.environ.get("VLLM_MARLIN_USE_ATOMIC_ADD", "0") == "1", diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 87e446f894384..bae487b75588e 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -1,9 +1,15 @@ # SPDX-License-Identifier: Apache-2.0 +from functools import cache from os import PathLike from pathlib import Path from typing import List, Optional, Union +from vllm.envs import VLLM_MODEL_REDIRECT_PATH +from vllm.logger import init_logger + +logger = init_logger(__name__) + def is_s3(model_or_path: str) -> bool: return model_or_path.lower().startswith('s3://') @@ -38,3 +44,35 @@ def modelscope_list_repo_files( if file['Type'] == 'blob' ] return files + + +@cache +def maybe_model_redirect(model: str) -> str: + """ + Use model_redirect to redirect the model name to a local folder. + + :param model: hf model name + :return: maybe redirect to a local folder + """ + + model_redirect_path = VLLM_MODEL_REDIRECT_PATH + + if not model_redirect_path: + return model + + if not Path(model_redirect_path).exists(): + return model + + with open(model_redirect_path) as f: + for line in f.readlines(): + try: + model_name, redirect_name = line.split("\t") + if model == model_name: + redirect_name = redirect_name.strip() + logger.info("model redirect: [ %s ] -> [ %s ]", model, + redirect_name) + return redirect_name + except Exception: + pass + + return model