mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:25:23 +08:00
[Misc][ModelScope] Change to use runtime VLLM_USE_MODELSCOPE (#18655)
Signed-off-by: Mengqing Cao <cmq0113@163.com> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
parent
cebc22f3b6
commit
6ab681bcbe
@ -60,6 +60,9 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
|
|||||||
# model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
|
# model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
|
||||||
with monkeypatch.context() as m:
|
with monkeypatch.context() as m:
|
||||||
m.setenv("VLLM_USE_MODELSCOPE", "True")
|
m.setenv("VLLM_USE_MODELSCOPE", "True")
|
||||||
|
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
|
||||||
|
# with 400 Client Error: Bad Request.
|
||||||
|
m.setenv("HF_TOKEN", "")
|
||||||
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
|
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
|
||||||
|
|
||||||
prompts = [
|
prompts = [
|
||||||
|
|||||||
@ -11,8 +11,8 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
|
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||||
|
|
||||||
|
from vllm import envs
|
||||||
from vllm.config import LoadConfig, LoadFormat, ModelConfig, VllmConfig
|
from vllm.config import LoadConfig, LoadFormat, ModelConfig, VllmConfig
|
||||||
from vllm.envs import VLLM_USE_MODELSCOPE
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.model_loader.base_loader import BaseModelLoader
|
from vllm.model_executor.model_loader.base_loader import BaseModelLoader
|
||||||
from vllm.model_executor.model_loader.utils import (
|
from vllm.model_executor.model_loader.utils import (
|
||||||
@ -64,7 +64,7 @@ class DefaultModelLoader(BaseModelLoader):
|
|||||||
|
|
||||||
Returns the path to the downloaded model, or None if the model is not
|
Returns the path to the downloaded model, or None if the model is not
|
||||||
downloaded from ModelScope."""
|
downloaded from ModelScope."""
|
||||||
if VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
# download model from ModelScope hub,
|
# download model from ModelScope hub,
|
||||||
# lazy import so that modelscope is not required for normal use.
|
# lazy import so that modelscope is not required for normal use.
|
||||||
# pylint: disable=C.
|
# pylint: disable=C.
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
from vllm.envs import VLLM_USE_MODELSCOPE
|
from vllm import envs
|
||||||
|
|
||||||
if VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
try:
|
try:
|
||||||
# Patch here, before each import happens
|
# Patch here, before each import happens
|
||||||
import modelscope
|
import modelscope
|
||||||
|
|||||||
@ -24,7 +24,7 @@ from transformers.models.auto.modeling_auto import (
|
|||||||
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
|
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
|
||||||
from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
|
from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
|
||||||
|
|
||||||
from vllm.envs import VLLM_USE_MODELSCOPE
|
from vllm import envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
# yapf conflicts with isort for this block
|
# yapf conflicts with isort for this block
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
@ -45,13 +45,12 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
|
|||||||
from vllm.transformers_utils.utils import check_gguf_file
|
from vllm.transformers_utils.utils import check_gguf_file
|
||||||
from vllm.utils import resolve_obj_by_qualname
|
from vllm.utils import resolve_obj_by_qualname
|
||||||
|
|
||||||
if VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
from modelscope import AutoConfig
|
from modelscope import AutoConfig
|
||||||
else:
|
else:
|
||||||
from transformers import AutoConfig
|
from transformers import AutoConfig
|
||||||
|
|
||||||
MISTRAL_CONFIG_NAME = "params.json"
|
MISTRAL_CONFIG_NAME = "params.json"
|
||||||
HF_TOKEN = os.getenv('HF_TOKEN', None)
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -130,7 +129,7 @@ def list_repo_files(
|
|||||||
]
|
]
|
||||||
# if model is remote, use hf_hub api to list files
|
# if model is remote, use hf_hub api to list files
|
||||||
try:
|
try:
|
||||||
if VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
from vllm.transformers_utils.utils import (
|
from vllm.transformers_utils.utils import (
|
||||||
modelscope_list_repo_files)
|
modelscope_list_repo_files)
|
||||||
return modelscope_list_repo_files(repo_id,
|
return modelscope_list_repo_files(repo_id,
|
||||||
@ -185,7 +184,7 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,
|
|||||||
return file_exists(str(model),
|
return file_exists(str(model),
|
||||||
config_name,
|
config_name,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
token=HF_TOKEN)
|
token=os.getenv('HF_TOKEN', None))
|
||||||
|
|
||||||
|
|
||||||
def patch_rope_scaling(config: PretrainedConfig) -> None:
|
def patch_rope_scaling(config: PretrainedConfig) -> None:
|
||||||
@ -312,7 +311,7 @@ def get_config(
|
|||||||
model,
|
model,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
code_revision=code_revision,
|
code_revision=code_revision,
|
||||||
token=HF_TOKEN,
|
token=os.getenv('HF_TOKEN', None),
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -324,7 +323,7 @@ def get_config(
|
|||||||
model,
|
model,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
code_revision=code_revision,
|
code_revision=code_revision,
|
||||||
token=HF_TOKEN,
|
token=os.getenv('HF_TOKEN', None),
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@ -334,7 +333,7 @@ def get_config(
|
|||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
code_revision=code_revision,
|
code_revision=code_revision,
|
||||||
token=HF_TOKEN,
|
token=os.getenv('HF_TOKEN', None),
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
@ -352,7 +351,7 @@ def get_config(
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
elif config_format == ConfigFormat.MISTRAL:
|
elif config_format == ConfigFormat.MISTRAL:
|
||||||
config = load_params_config(model, revision, token=HF_TOKEN, **kwargs)
|
config = load_params_config(model, revision, **kwargs)
|
||||||
else:
|
else:
|
||||||
supported_formats = [
|
supported_formats = [
|
||||||
fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO
|
fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO
|
||||||
@ -561,7 +560,7 @@ def get_sentence_transformer_tokenizer_config(model: str,
|
|||||||
# If model is on HuggingfaceHub, get the repo files
|
# If model is on HuggingfaceHub, get the repo files
|
||||||
repo_files = list_repo_files(model,
|
repo_files = list_repo_files(model,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
token=HF_TOKEN)
|
token=os.getenv('HF_TOKEN', None))
|
||||||
except Exception:
|
except Exception:
|
||||||
repo_files = []
|
repo_files = []
|
||||||
|
|
||||||
@ -768,7 +767,7 @@ def get_hf_image_processor_config(
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
# ModelScope does not provide an interface for image_processor
|
# ModelScope does not provide an interface for image_processor
|
||||||
if VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
return dict()
|
return dict()
|
||||||
# Separate model folder from file path for GGUF models
|
# Separate model folder from file path for GGUF models
|
||||||
if check_gguf_file(model):
|
if check_gguf_file(model):
|
||||||
|
|||||||
@ -13,7 +13,7 @@ import huggingface_hub
|
|||||||
from transformers import (AutoTokenizer, PreTrainedTokenizer,
|
from transformers import (AutoTokenizer, PreTrainedTokenizer,
|
||||||
PreTrainedTokenizerFast)
|
PreTrainedTokenizerFast)
|
||||||
|
|
||||||
from vllm.envs import VLLM_USE_MODELSCOPE
|
from vllm import envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.transformers_utils.tokenizer_base import (TokenizerBase,
|
from vllm.transformers_utils.tokenizer_base import (TokenizerBase,
|
||||||
@ -168,7 +168,7 @@ def get_tokenizer(
|
|||||||
) -> AnyTokenizer:
|
) -> AnyTokenizer:
|
||||||
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope.
|
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope.
|
||||||
"""
|
"""
|
||||||
if VLLM_USE_MODELSCOPE:
|
if envs.VLLM_USE_MODELSCOPE:
|
||||||
# download model from ModelScope hub,
|
# download model from ModelScope hub,
|
||||||
# lazy import so that modelscope is not required for normal use.
|
# lazy import so that modelscope is not required for normal use.
|
||||||
# pylint: disable=C.
|
# pylint: disable=C.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user