mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:45:01 +08:00
[CI] Skip the pooling models that do not support transformers v4.55 (#22411)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
parent
4be02a3776
commit
2a4c825523
@ -7,7 +7,7 @@ import pytest
|
|||||||
from vllm.config import PoolerConfig
|
from vllm.config import PoolerConfig
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ...utils import check_embeddings_close
|
from ...utils import check_embeddings_close, check_transformers_version
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
@ -56,6 +56,9 @@ def test_models(
|
|||||||
model,
|
model,
|
||||||
monkeypatch,
|
monkeypatch,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
|
||||||
|
check_transformers_version(model, max_transformers_version="4.53.2")
|
||||||
|
|
||||||
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
|
if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
|
||||||
# ROCm Triton FA does not currently support sliding window attention
|
# ROCm Triton FA does not currently support sliding window attention
|
||||||
# switch to use ROCm CK FA backend
|
# switch to use ROCm CK FA backend
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from typing import Any
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from ...utils import check_transformers_version
|
||||||
from .embed_utils import EmbedModelInfo, correctness_test_embed_models
|
from .embed_utils import EmbedModelInfo, correctness_test_embed_models
|
||||||
from .mteb_utils import mteb_test_embed_models
|
from .mteb_utils import mteb_test_embed_models
|
||||||
|
|
||||||
@ -60,6 +61,10 @@ MODELS = [
|
|||||||
@pytest.mark.parametrize("model_info", MODELS)
|
@pytest.mark.parametrize("model_info", MODELS)
|
||||||
def test_embed_models_mteb(hf_runner, vllm_runner,
|
def test_embed_models_mteb(hf_runner, vllm_runner,
|
||||||
model_info: EmbedModelInfo) -> None:
|
model_info: EmbedModelInfo) -> None:
|
||||||
|
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
|
||||||
|
check_transformers_version(model_info.name,
|
||||||
|
max_transformers_version="4.53.2")
|
||||||
|
|
||||||
vllm_extra_kwargs: dict[str, Any] = {}
|
vllm_extra_kwargs: dict[str, Any] = {}
|
||||||
if model_info.architecture == "GteNewModel":
|
if model_info.architecture == "GteNewModel":
|
||||||
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
|
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
|
||||||
@ -72,6 +77,10 @@ def test_embed_models_mteb(hf_runner, vllm_runner,
|
|||||||
def test_embed_models_correctness(hf_runner, vllm_runner,
|
def test_embed_models_correctness(hf_runner, vllm_runner,
|
||||||
model_info: EmbedModelInfo,
|
model_info: EmbedModelInfo,
|
||||||
example_prompts) -> None:
|
example_prompts) -> None:
|
||||||
|
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
|
||||||
|
check_transformers_version(model_info.name,
|
||||||
|
max_transformers_version="4.53.2")
|
||||||
|
|
||||||
vllm_extra_kwargs: dict[str, Any] = {}
|
vllm_extra_kwargs: dict[str, Any] = {}
|
||||||
if model_info.architecture == "GteNewModel":
|
if model_info.architecture == "GteNewModel":
|
||||||
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
|
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
|
||||||
|
|||||||
@ -10,6 +10,7 @@ from transformers import AutoModel
|
|||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ....conftest import HfRunner
|
from ....conftest import HfRunner
|
||||||
|
from ...utils import check_transformers_version
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
@ -86,6 +87,9 @@ def test_prm_models(
|
|||||||
dtype: str,
|
dtype: str,
|
||||||
monkeypatch,
|
monkeypatch,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
check_transformers_version("Qwen/Qwen2.5-Math-PRM-7B",
|
||||||
|
max_transformers_version="4.53.2")
|
||||||
|
|
||||||
if current_platform.is_cpu() and os.environ.get("VLLM_USE_V1", "0") == "0":
|
if current_platform.is_cpu() and os.environ.get("VLLM_USE_V1", "0") == "0":
|
||||||
pytest.skip("CPU only supports V1")
|
pytest.skip("CPU only supports V1")
|
||||||
|
|
||||||
|
|||||||
@ -412,3 +412,14 @@ def dummy_hf_overrides(
|
|||||||
})
|
})
|
||||||
|
|
||||||
return hf_config
|
return hf_config
|
||||||
|
|
||||||
|
|
||||||
|
def check_transformers_version(model: str,
|
||||||
|
min_transformers_version: Optional[str] = None,
|
||||||
|
max_transformers_version: Optional[str] = None):
|
||||||
|
from .registry import _HfExamplesInfo
|
||||||
|
|
||||||
|
return _HfExamplesInfo(model,
|
||||||
|
min_transformers_version=min_transformers_version,
|
||||||
|
max_transformers_version=max_transformers_version
|
||||||
|
).check_transformers_version(on_fail="skip")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user