mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:15:01 +08:00
[CI] Add mteb testing to test the accuracy of the embedding model (#17175)
This commit is contained in:
parent
d6c86d09ae
commit
86847700d7
@ -33,6 +33,7 @@ num2words # required for smolvlm test
|
|||||||
opencv-python-headless >= 4.11.0 # required for video test
|
opencv-python-headless >= 4.11.0 # required for video test
|
||||||
datamodel_code_generator # required for minicpm3 test
|
datamodel_code_generator # required for minicpm3 test
|
||||||
lm-eval[api]==0.4.8 # required for model evaluation test
|
lm-eval[api]==0.4.8 # required for model evaluation test
|
||||||
|
mteb>=1.38.11, <2 # required for mteb test
|
||||||
transformers==4.51.3
|
transformers==4.51.3
|
||||||
tokenizers==0.21.1
|
tokenizers==0.21.1
|
||||||
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
|
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
|
||||||
|
|||||||
@ -99,6 +99,7 @@ datasets==3.0.2
|
|||||||
# via
|
# via
|
||||||
# evaluate
|
# evaluate
|
||||||
# lm-eval
|
# lm-eval
|
||||||
|
# mteb
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
# via librosa
|
# via librosa
|
||||||
dill==0.3.8
|
dill==0.3.8
|
||||||
@ -124,6 +125,8 @@ email-validator==2.2.0
|
|||||||
# via pydantic
|
# via pydantic
|
||||||
encodec==0.1.1
|
encodec==0.1.1
|
||||||
# via vocos
|
# via vocos
|
||||||
|
eval-type-backport==0.2.2
|
||||||
|
# via mteb
|
||||||
evaluate==0.4.3
|
evaluate==0.4.3
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
fastparquet==2024.11.0
|
fastparquet==2024.11.0
|
||||||
@ -291,6 +294,8 @@ msgpack==1.1.0
|
|||||||
# via
|
# via
|
||||||
# librosa
|
# librosa
|
||||||
# ray
|
# ray
|
||||||
|
mteb==1.38.11
|
||||||
|
# via -r requirements/test.in
|
||||||
multidict==6.1.0
|
multidict==6.1.0
|
||||||
# via
|
# via
|
||||||
# aiohttp
|
# aiohttp
|
||||||
@ -331,6 +336,7 @@ numpy==1.26.4
|
|||||||
# librosa
|
# librosa
|
||||||
# matplotlib
|
# matplotlib
|
||||||
# mistral-common
|
# mistral-common
|
||||||
|
# mteb
|
||||||
# numba
|
# numba
|
||||||
# numexpr
|
# numexpr
|
||||||
# opencv-python-headless
|
# opencv-python-headless
|
||||||
@ -443,6 +449,8 @@ plotly==5.24.1
|
|||||||
# via genai-perf
|
# via genai-perf
|
||||||
pluggy==1.5.0
|
pluggy==1.5.0
|
||||||
# via pytest
|
# via pytest
|
||||||
|
polars==1.29.0
|
||||||
|
# via mteb
|
||||||
pooch==1.8.2
|
pooch==1.8.2
|
||||||
# via librosa
|
# via librosa
|
||||||
portalocker==2.10.1
|
portalocker==2.10.1
|
||||||
@ -476,6 +484,7 @@ pydantic==2.9.2
|
|||||||
# via
|
# via
|
||||||
# datamodel-code-generator
|
# datamodel-code-generator
|
||||||
# mistral-common
|
# mistral-common
|
||||||
|
# mteb
|
||||||
pydantic-core==2.23.4
|
pydantic-core==2.23.4
|
||||||
# via pydantic
|
# via pydantic
|
||||||
pygments==2.18.0
|
pygments==2.18.0
|
||||||
@ -522,6 +531,8 @@ python-dateutil==2.9.0.post0
|
|||||||
# typepy
|
# typepy
|
||||||
python-rapidjson==1.20
|
python-rapidjson==1.20
|
||||||
# via tritonclient
|
# via tritonclient
|
||||||
|
pytrec-eval-terrier==0.5.7
|
||||||
|
# via mteb
|
||||||
pytz==2024.2
|
pytz==2024.2
|
||||||
# via
|
# via
|
||||||
# pandas
|
# pandas
|
||||||
@ -564,6 +575,7 @@ requests==2.32.3
|
|||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# lm-eval
|
# lm-eval
|
||||||
# mistral-common
|
# mistral-common
|
||||||
|
# mteb
|
||||||
# pooch
|
# pooch
|
||||||
# ray
|
# ray
|
||||||
# responses
|
# responses
|
||||||
@ -580,6 +592,7 @@ rfc3987==1.3.8
|
|||||||
rich==13.9.4
|
rich==13.9.4
|
||||||
# via
|
# via
|
||||||
# genai-perf
|
# genai-perf
|
||||||
|
# mteb
|
||||||
# typer
|
# typer
|
||||||
rouge-score==0.1.2
|
rouge-score==0.1.2
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
@ -607,16 +620,20 @@ scikit-learn==1.5.2
|
|||||||
# via
|
# via
|
||||||
# librosa
|
# librosa
|
||||||
# lm-eval
|
# lm-eval
|
||||||
|
# mteb
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
scipy==1.13.1
|
scipy==1.13.1
|
||||||
# via
|
# via
|
||||||
# librosa
|
# librosa
|
||||||
|
# mteb
|
||||||
# scikit-learn
|
# scikit-learn
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
# statsmodels
|
# statsmodels
|
||||||
# vocos
|
# vocos
|
||||||
sentence-transformers==3.2.1
|
sentence-transformers==3.2.1
|
||||||
# via -r requirements/test.in
|
# via
|
||||||
|
# -r requirements/test.in
|
||||||
|
# mteb
|
||||||
sentencepiece==0.2.0
|
sentencepiece==0.2.0
|
||||||
# via mistral-common
|
# via mistral-common
|
||||||
setuptools==77.0.3
|
setuptools==77.0.3
|
||||||
@ -696,6 +713,7 @@ torch==2.7.0+cu128
|
|||||||
# fastsafetensors
|
# fastsafetensors
|
||||||
# lm-eval
|
# lm-eval
|
||||||
# mamba-ssm
|
# mamba-ssm
|
||||||
|
# mteb
|
||||||
# peft
|
# peft
|
||||||
# runai-model-streamer
|
# runai-model-streamer
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
@ -720,6 +738,7 @@ tqdm==4.66.6
|
|||||||
# evaluate
|
# evaluate
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# lm-eval
|
# lm-eval
|
||||||
|
# mteb
|
||||||
# nltk
|
# nltk
|
||||||
# peft
|
# peft
|
||||||
# pqdm
|
# pqdm
|
||||||
@ -759,6 +778,7 @@ typing-extensions==4.12.2
|
|||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# librosa
|
# librosa
|
||||||
# mistral-common
|
# mistral-common
|
||||||
|
# mteb
|
||||||
# pqdm
|
# pqdm
|
||||||
# pydantic
|
# pydantic
|
||||||
# pydantic-core
|
# pydantic-core
|
||||||
|
|||||||
42
tests/entrypoints/openai/correctness/test_mteb.py
Normal file
42
tests/entrypoints/openai/correctness/test_mteb.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tests.models.language.pooling.mteb_utils import (MTEB_EMBED_TASKS,
|
||||||
|
OpenAIClientMtebEncoder,
|
||||||
|
run_mteb_embed_task,
|
||||||
|
run_mteb_embed_task_st)
|
||||||
|
from tests.utils import RemoteOpenAIServer
|
||||||
|
|
||||||
|
os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"
|
||||||
|
|
||||||
|
MODEL_NAME = "BAAI/bge-m3"
|
||||||
|
DTYPE = "float16"
|
||||||
|
MAIN_SCORE = 0.7873427091972599
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def server():
|
||||||
|
args = [
|
||||||
|
"--task", "embed", "--dtype", DTYPE, "--enforce-eager",
|
||||||
|
"--max-model-len", "512"
|
||||||
|
]
|
||||||
|
|
||||||
|
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
||||||
|
yield remote_server
|
||||||
|
|
||||||
|
|
||||||
|
def test_mteb(server):
|
||||||
|
client = server.get_client()
|
||||||
|
encoder = OpenAIClientMtebEncoder(MODEL_NAME, client)
|
||||||
|
vllm_main_score = run_mteb_embed_task(encoder, MTEB_EMBED_TASKS)
|
||||||
|
st_main_score = MAIN_SCORE or run_mteb_embed_task_st(
|
||||||
|
MODEL_NAME, MTEB_EMBED_TASKS)
|
||||||
|
|
||||||
|
print("VLLM main score: ", vllm_main_score)
|
||||||
|
print("SentenceTransformer main score: ", st_main_score)
|
||||||
|
print("Difference: ", st_main_score - vllm_main_score)
|
||||||
|
|
||||||
|
assert math.isclose(st_main_score, vllm_main_score, rel_tol=1e-4)
|
||||||
@ -58,8 +58,6 @@ MODELS = [
|
|||||||
@pytest.mark.parametrize("model_info", MODELS)
|
@pytest.mark.parametrize("model_info", MODELS)
|
||||||
def test_models_mteb(hf_runner, vllm_runner,
|
def test_models_mteb(hf_runner, vllm_runner,
|
||||||
model_info: EmbedModelInfo) -> None:
|
model_info: EmbedModelInfo) -> None:
|
||||||
pytest.skip("Skipping mteb test.")
|
|
||||||
|
|
||||||
from .mteb_utils import mteb_test_embed_models
|
from .mteb_utils import mteb_test_embed_models
|
||||||
|
|
||||||
vllm_extra_kwargs: dict[str, Any] = {}
|
vllm_extra_kwargs: dict[str, Any] = {}
|
||||||
|
|||||||
@ -23,7 +23,6 @@ MODELS = [
|
|||||||
@pytest.mark.parametrize("model_info", MODELS)
|
@pytest.mark.parametrize("model_info", MODELS)
|
||||||
def test_models_mteb(hf_runner, vllm_runner,
|
def test_models_mteb(hf_runner, vllm_runner,
|
||||||
model_info: EmbedModelInfo) -> None:
|
model_info: EmbedModelInfo) -> None:
|
||||||
pytest.skip("Skipping mteb test.")
|
|
||||||
from .mteb_utils import mteb_test_embed_models
|
from .mteb_utils import mteb_test_embed_models
|
||||||
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
|
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
|
||||||
|
|
||||||
|
|||||||
@ -46,7 +46,6 @@ def test_models_mteb(
|
|||||||
vllm_runner,
|
vllm_runner,
|
||||||
model_info: EmbedModelInfo,
|
model_info: EmbedModelInfo,
|
||||||
) -> None:
|
) -> None:
|
||||||
pytest.skip("Skipping mteb test.")
|
|
||||||
from .mteb_utils import mteb_test_embed_models
|
from .mteb_utils import mteb_test_embed_models
|
||||||
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
|
mteb_test_embed_models(hf_runner, vllm_runner, model_info)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user