[Misc][ModelScope] Change to use runtime VLLM_USE_MODELSCOPE (#18655)

Signed-off-by: Mengqing Cao <cmq0113@163.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Mengqing Cao 2025-05-25 12:51:21 +08:00 committed by GitHub
parent cebc22f3b6
commit 6ab681bcbe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 19 additions and 17 deletions

View File

@ -60,6 +60,9 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
# model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary # model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
with monkeypatch.context() as m: with monkeypatch.context() as m:
m.setenv("VLLM_USE_MODELSCOPE", "True") m.setenv("VLLM_USE_MODELSCOPE", "True")
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
# with 400 Client Error: Bad Request.
m.setenv("HF_TOKEN", "")
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat") llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
prompts = [ prompts = [

View File

@ -11,8 +11,8 @@ import torch
from torch import nn from torch import nn
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
from vllm import envs
from vllm.config import LoadConfig, LoadFormat, ModelConfig, VllmConfig from vllm.config import LoadConfig, LoadFormat, ModelConfig, VllmConfig
from vllm.envs import VLLM_USE_MODELSCOPE
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.model_loader.base_loader import BaseModelLoader from vllm.model_executor.model_loader.base_loader import BaseModelLoader
from vllm.model_executor.model_loader.utils import ( from vllm.model_executor.model_loader.utils import (
@ -64,7 +64,7 @@ class DefaultModelLoader(BaseModelLoader):
Returns the path to the downloaded model, or None if the model is not Returns the path to the downloaded model, or None if the model is not
downloaded from ModelScope.""" downloaded from ModelScope."""
if VLLM_USE_MODELSCOPE: if envs.VLLM_USE_MODELSCOPE:
# download model from ModelScope hub, # download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use. # lazy import so that modelscope is not required for normal use.
# pylint: disable=C. # pylint: disable=C.

View File

@ -1,8 +1,8 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from vllm.envs import VLLM_USE_MODELSCOPE from vllm import envs
if VLLM_USE_MODELSCOPE: if envs.VLLM_USE_MODELSCOPE:
try: try:
# Patch here, before each import happens # Patch here, before each import happens
import modelscope import modelscope

View File

@ -24,7 +24,7 @@ from transformers.models.auto.modeling_auto import (
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES) MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
from vllm.envs import VLLM_USE_MODELSCOPE from vllm import envs
from vllm.logger import init_logger from vllm.logger import init_logger
# yapf conflicts with isort for this block # yapf conflicts with isort for this block
# yapf: disable # yapf: disable
@ -45,13 +45,12 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
from vllm.transformers_utils.utils import check_gguf_file from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import resolve_obj_by_qualname from vllm.utils import resolve_obj_by_qualname
if VLLM_USE_MODELSCOPE: if envs.VLLM_USE_MODELSCOPE:
from modelscope import AutoConfig from modelscope import AutoConfig
else: else:
from transformers import AutoConfig from transformers import AutoConfig
MISTRAL_CONFIG_NAME = "params.json" MISTRAL_CONFIG_NAME = "params.json"
HF_TOKEN = os.getenv('HF_TOKEN', None)
logger = init_logger(__name__) logger = init_logger(__name__)
@ -130,7 +129,7 @@ def list_repo_files(
] ]
# if model is remote, use hf_hub api to list files # if model is remote, use hf_hub api to list files
try: try:
if VLLM_USE_MODELSCOPE: if envs.VLLM_USE_MODELSCOPE:
from vllm.transformers_utils.utils import ( from vllm.transformers_utils.utils import (
modelscope_list_repo_files) modelscope_list_repo_files)
return modelscope_list_repo_files(repo_id, return modelscope_list_repo_files(repo_id,
@ -185,7 +184,7 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,
return file_exists(str(model), return file_exists(str(model),
config_name, config_name,
revision=revision, revision=revision,
token=HF_TOKEN) token=os.getenv('HF_TOKEN', None))
def patch_rope_scaling(config: PretrainedConfig) -> None: def patch_rope_scaling(config: PretrainedConfig) -> None:
@ -312,7 +311,7 @@ def get_config(
model, model,
revision=revision, revision=revision,
code_revision=code_revision, code_revision=code_revision,
token=HF_TOKEN, token=os.getenv('HF_TOKEN', None),
**kwargs, **kwargs,
) )
@ -324,7 +323,7 @@ def get_config(
model, model,
revision=revision, revision=revision,
code_revision=code_revision, code_revision=code_revision,
token=HF_TOKEN, token=os.getenv('HF_TOKEN', None),
**kwargs, **kwargs,
) )
else: else:
@ -334,7 +333,7 @@ def get_config(
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
revision=revision, revision=revision,
code_revision=code_revision, code_revision=code_revision,
token=HF_TOKEN, token=os.getenv('HF_TOKEN', None),
**kwargs, **kwargs,
) )
except ValueError as e: except ValueError as e:
@ -352,7 +351,7 @@ def get_config(
raise e raise e
elif config_format == ConfigFormat.MISTRAL: elif config_format == ConfigFormat.MISTRAL:
config = load_params_config(model, revision, token=HF_TOKEN, **kwargs) config = load_params_config(model, revision, **kwargs)
else: else:
supported_formats = [ supported_formats = [
fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO
@ -561,7 +560,7 @@ def get_sentence_transformer_tokenizer_config(model: str,
# If model is on HuggingfaceHub, get the repo files # If model is on HuggingfaceHub, get the repo files
repo_files = list_repo_files(model, repo_files = list_repo_files(model,
revision=revision, revision=revision,
token=HF_TOKEN) token=os.getenv('HF_TOKEN', None))
except Exception: except Exception:
repo_files = [] repo_files = []
@ -768,7 +767,7 @@ def get_hf_image_processor_config(
**kwargs, **kwargs,
) -> dict[str, Any]: ) -> dict[str, Any]:
# ModelScope does not provide an interface for image_processor # ModelScope does not provide an interface for image_processor
if VLLM_USE_MODELSCOPE: if envs.VLLM_USE_MODELSCOPE:
return dict() return dict()
# Separate model folder from file path for GGUF models # Separate model folder from file path for GGUF models
if check_gguf_file(model): if check_gguf_file(model):

View File

@ -13,7 +13,7 @@ import huggingface_hub
from transformers import (AutoTokenizer, PreTrainedTokenizer, from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast) PreTrainedTokenizerFast)
from vllm.envs import VLLM_USE_MODELSCOPE from vllm import envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer_base import (TokenizerBase, from vllm.transformers_utils.tokenizer_base import (TokenizerBase,
@ -168,7 +168,7 @@ def get_tokenizer(
) -> AnyTokenizer: ) -> AnyTokenizer:
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope. """Gets a tokenizer for the given model name via HuggingFace or ModelScope.
""" """
if VLLM_USE_MODELSCOPE: if envs.VLLM_USE_MODELSCOPE:
# download model from ModelScope hub, # download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use. # lazy import so that modelscope is not required for normal use.
# pylint: disable=C. # pylint: disable=C.