diff --git a/tests/test_regression.py b/tests/test_regression.py index cb68e9ecfc06..5d27d3579301 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -53,6 +53,27 @@ def test_gc(): assert allocated < 50 * 1024 * 1024 +def test_model_from_modelscope(monkeypatch): + # model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary + MODELSCOPE_MODEL_NAME = "qwen/Qwen1.5-0.5B-Chat" + monkeypatch.setenv("VLLM_USE_MODELSCOPE", "True") + try: + llm = LLM(model=MODELSCOPE_MODEL_NAME) + + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + + outputs = llm.generate(prompts, sampling_params) + assert len(outputs) == 4 + finally: + monkeypatch.delenv("VLLM_USE_MODELSCOPE", raising=False) + + if __name__ == "__main__": import pytest pytest.main([__file__]) diff --git a/vllm/config.py b/vllm/config.py index 3a14c391b7f0..4efdb6cab52c 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -113,7 +113,11 @@ class ModelConfig: self.revision = revision self.code_revision = code_revision self.rope_scaling = rope_scaling - self.tokenizer_revision = tokenizer_revision + # The tokenizer version is consistent with the model version by default. + if tokenizer_revision is None: + self.tokenizer_revision = revision + else: + self.tokenizer_revision = tokenizer_revision self.quantization = quantization self.quantization_param_path = quantization_param_path self.enforce_eager = enforce_eager diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 044eec6410a5..970645987885 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -1,7 +1,8 @@ from typing import Dict, Optional -from transformers import AutoConfig, PretrainedConfig +from transformers import PretrainedConfig +from vllm.envs import VLLM_USE_MODELSCOPE from vllm.logger import init_logger from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig, JAISConfig, MPTConfig, RWConfig) @@ -24,6 +25,10 @@ def get_config(model: str, code_revision: Optional[str] = None, rope_scaling: Optional[dict] = None) -> PretrainedConfig: try: + if VLLM_USE_MODELSCOPE: + from modelscope import AutoConfig + else: + from transformers import AutoConfig config = AutoConfig.from_pretrained( model, trust_remote_code=trust_remote_code,