mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 08:44:27 +08:00
53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
import pytest
|
|
|
|
from tests.models.language.pooling_mteb_test.mteb_embed_utils import (
|
|
mteb_test_embed_models,
|
|
)
|
|
from tests.models.language.pooling_mteb_test.mteb_score_utils import (
|
|
mteb_test_rerank_models,
|
|
)
|
|
from tests.models.utils import (
|
|
EmbedModelInfo,
|
|
RerankModelInfo,
|
|
)
|
|
|
|
EMBEDDING_MODELS = [
|
|
EmbedModelInfo(
|
|
"nvidia/llama-nemotron-embed-1b-v2",
|
|
architecture="LlamaBidirectionalModel",
|
|
mteb_score=0.689164662128673,
|
|
pooling_type="MEAN",
|
|
attn_type="encoder_only",
|
|
is_prefix_caching_supported=False,
|
|
is_chunked_prefill_supported=False,
|
|
)
|
|
]
|
|
|
|
RERANK_MODELS = [
|
|
RerankModelInfo(
|
|
"nvidia/llama-nemotron-rerank-1b-v2",
|
|
architecture="LlamaBidirectionalForSequenceClassification",
|
|
chat_template_name="nemotron-rerank.jinja",
|
|
mteb_score=0.33994,
|
|
pooling_type="MEAN",
|
|
attn_type="encoder_only",
|
|
is_prefix_caching_supported=False,
|
|
is_chunked_prefill_supported=False,
|
|
),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
|
|
def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo) -> None:
|
|
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
|
|
|
|
|
|
@pytest.mark.parametrize("model_info", RERANK_MODELS)
|
|
def test_rerank_models_mteb(
|
|
hf_runner, vllm_runner, model_info: RerankModelInfo
|
|
) -> None:
|
|
mteb_test_rerank_models(hf_runner, vllm_runner, model_info)
|