mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 13:36:12 +08:00
35 lines
940 B
Python
35 lines
940 B
Python
"""Ensure that a text-only Qwen model can be run without throwing an error.
|
|
We explicitly test this because Qwen is implemented as a multimodal and
|
|
supports a visual encoder for models like Qwen-VL.
|
|
"""
|
|
from typing import List, Type
|
|
|
|
import pytest
|
|
|
|
from ....conftest import VllmRunner
|
|
|
|
models = [
|
|
"Qwen/Qwen-7B-Chat" # Has no visual encoder
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("model", models)
|
|
@pytest.mark.parametrize("dtype", ["bfloat16"])
|
|
@pytest.mark.parametrize("max_tokens", [32])
|
|
@pytest.mark.parametrize("num_logprobs", [5])
|
|
def test_text_only_qwen_model_can_be_loaded_and_run(
|
|
vllm_runner: Type[VllmRunner],
|
|
example_prompts: List[str],
|
|
model: str,
|
|
*,
|
|
dtype: str,
|
|
max_tokens: int,
|
|
num_logprobs: int,
|
|
):
|
|
with vllm_runner(model, dtype=dtype) as vllm_model:
|
|
vllm_model.generate_greedy_logprobs(
|
|
example_prompts,
|
|
max_tokens,
|
|
num_logprobs=num_logprobs,
|
|
)
|