mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-04 20:22:15 +08:00
Use smaller embedding model when not testing model specifically (#13891)
This commit is contained in:
parent
b9e41734c5
commit
76c89fcadd
@ -8,7 +8,7 @@ import pytest
|
|||||||
from vllm import LLM, PoolingParams, PoolingRequestOutput
|
from vllm import LLM, PoolingParams, PoolingRequestOutput
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
|
|
||||||
MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
|
MODEL_NAME = "intfloat/multilingual-e5-small"
|
||||||
|
|
||||||
PROMPTS = [
|
PROMPTS = [
|
||||||
"Hello, my name is",
|
"Hello, my name is",
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
|
|||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
|
MODEL_NAME = "intfloat/multilingual-e5-small"
|
||||||
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -282,7 +282,7 @@ async def test_metrics_exist(server: RemoteOpenAIServer,
|
|||||||
def test_metrics_exist_run_batch(use_v1: bool):
|
def test_metrics_exist_run_batch(use_v1: bool):
|
||||||
if use_v1:
|
if use_v1:
|
||||||
pytest.skip("Skipping test on vllm V1")
|
pytest.skip("Skipping test on vllm V1")
|
||||||
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501
|
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}}""" # noqa: E501
|
||||||
|
|
||||||
base_url = "0.0.0.0"
|
base_url = "0.0.0.0"
|
||||||
port = "8001"
|
port = "8001"
|
||||||
@ -302,7 +302,7 @@ def test_metrics_exist_run_batch(use_v1: bool):
|
|||||||
"-o",
|
"-o",
|
||||||
output_file.name,
|
output_file.name,
|
||||||
"--model",
|
"--model",
|
||||||
"intfloat/e5-mistral-7b-instruct",
|
"intfloat/multilingual-e5-small",
|
||||||
"--enable-metrics",
|
"--enable-metrics",
|
||||||
"--url",
|
"--url",
|
||||||
base_url,
|
base_url,
|
||||||
|
|||||||
@ -18,10 +18,10 @@ INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/c
|
|||||||
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
|
||||||
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
|
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
|
||||||
|
|
||||||
INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}
|
INPUT_EMBEDDING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}}
|
||||||
{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are an unhelpful assistant."}}
|
{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are an unhelpful assistant."}}
|
||||||
|
|
||||||
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "Hello world!"}}
|
{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "Hello world!"}}
|
||||||
{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
|
{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}"""
|
||||||
|
|
||||||
INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
|
INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
|
||||||
@ -37,7 +37,7 @@ def test_empty_file():
|
|||||||
proc = subprocess.Popen([
|
proc = subprocess.Popen([
|
||||||
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
|
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
|
||||||
input_file.name, "-o", output_file.name, "--model",
|
input_file.name, "-o", output_file.name, "--model",
|
||||||
"intfloat/e5-mistral-7b-instruct"
|
"intfloat/multilingual-e5-small"
|
||||||
], )
|
], )
|
||||||
proc.communicate()
|
proc.communicate()
|
||||||
proc.wait()
|
proc.wait()
|
||||||
@ -97,7 +97,7 @@ def test_embeddings():
|
|||||||
proc = subprocess.Popen([
|
proc = subprocess.Popen([
|
||||||
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
|
sys.executable, "-m", "vllm.entrypoints.openai.run_batch", "-i",
|
||||||
input_file.name, "-o", output_file.name, "--model",
|
input_file.name, "-o", output_file.name, "--model",
|
||||||
"intfloat/e5-mistral-7b-instruct"
|
"intfloat/multilingual-e5-small"
|
||||||
], )
|
], )
|
||||||
proc.communicate()
|
proc.communicate()
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
|||||||
@ -14,7 +14,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME", "BAAI/bge-base-en-v1.5")
|
|||||||
REVISION = os.environ.get("REVISION", "main")
|
REVISION = os.environ.get("REVISION", "main")
|
||||||
|
|
||||||
MODEL_NAME_ROBERTA = os.environ.get("MODEL_NAME",
|
MODEL_NAME_ROBERTA = os.environ.get("MODEL_NAME",
|
||||||
"intfloat/multilingual-e5-large")
|
"intfloat/multilingual-e5-small")
|
||||||
REVISION_ROBERTA = os.environ.get("REVISION", "main")
|
REVISION_ROBERTA = os.environ.get("REVISION", "main")
|
||||||
|
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ def test_roberta_model_loading_with_params(vllm_runner):
|
|||||||
assert model_config.pooler_config.pooling_norm
|
assert model_config.pooler_config.pooling_norm
|
||||||
|
|
||||||
# asserts on the tokenizer loaded
|
# asserts on the tokenizer loaded
|
||||||
assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-large"
|
assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-small"
|
||||||
assert not model_tokenizer.tokenizer_config["do_lower_case"]
|
assert not model_tokenizer.tokenizer_config["do_lower_case"]
|
||||||
|
|
||||||
def check_model(model):
|
def check_model(model):
|
||||||
|
|||||||
@ -17,7 +17,7 @@ from ..utils import check_embeddings_close
|
|||||||
pytest.param("BAAI/bge-base-en-v1.5",
|
pytest.param("BAAI/bge-base-en-v1.5",
|
||||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
|
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
|
||||||
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
|
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
|
||||||
pytest.param("intfloat/multilingual-e5-large"),
|
pytest.param("intfloat/multilingual-e5-small"),
|
||||||
# [Decoder-only]
|
# [Decoder-only]
|
||||||
pytest.param("BAAI/bge-multilingual-gemma2",
|
pytest.param("BAAI/bge-multilingual-gemma2",
|
||||||
marks=[pytest.mark.core_model]),
|
marks=[pytest.mark.core_model]),
|
||||||
|
|||||||
@ -211,7 +211,7 @@ _EMBEDDING_EXAMPLE_MODELS = {
|
|||||||
"Qwen2ForSequenceClassification": _HfExamplesInfo("jason9693/Qwen2.5-1.5B-apeach"), # noqa: E501
|
"Qwen2ForSequenceClassification": _HfExamplesInfo("jason9693/Qwen2.5-1.5B-apeach"), # noqa: E501
|
||||||
"RobertaModel": _HfExamplesInfo("sentence-transformers/stsb-roberta-base-v2"), # noqa: E501
|
"RobertaModel": _HfExamplesInfo("sentence-transformers/stsb-roberta-base-v2"), # noqa: E501
|
||||||
"RobertaForMaskedLM": _HfExamplesInfo("sentence-transformers/all-roberta-large-v1"), # noqa: E501
|
"RobertaForMaskedLM": _HfExamplesInfo("sentence-transformers/all-roberta-large-v1"), # noqa: E501
|
||||||
"XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-large"),
|
"XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-small"),
|
||||||
# [Multimodal]
|
# [Multimodal]
|
||||||
"LlavaNextForConditionalGeneration": _HfExamplesInfo("royokong/e5-v"),
|
"LlavaNextForConditionalGeneration": _HfExamplesInfo("royokong/e5-v"),
|
||||||
"Phi3VForCausalLM": _HfExamplesInfo("TIGER-Lab/VLM2Vec-Full",
|
"Phi3VForCausalLM": _HfExamplesInfo("TIGER-Lab/VLM2Vec-Full",
|
||||||
|
|||||||
@ -13,7 +13,7 @@ from vllm.platforms import current_platform
|
|||||||
("model_id", "expected_runner_type", "expected_task"),
|
("model_id", "expected_runner_type", "expected_task"),
|
||||||
[
|
[
|
||||||
("distilbert/distilgpt2", "generate", "generate"),
|
("distilbert/distilgpt2", "generate", "generate"),
|
||||||
("intfloat/e5-mistral-7b-instruct", "pooling", "embed"),
|
("intfloat/multilingual-e5-small", "pooling", "embed"),
|
||||||
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
|
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
|
||||||
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "score"),
|
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "score"),
|
||||||
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
|
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
|
||||||
|
|||||||
@ -28,7 +28,7 @@ MODELS_ON_S3 = [
|
|||||||
"HuggingFaceM4/Idefics3-8B-Llama3",
|
"HuggingFaceM4/Idefics3-8B-Llama3",
|
||||||
"internlm/internlm2-1_8b-reward",
|
"internlm/internlm2-1_8b-reward",
|
||||||
"intfloat/e5-mistral-7b-instruct",
|
"intfloat/e5-mistral-7b-instruct",
|
||||||
"intfloat/multilingual-e5-large",
|
"intfloat/multilingual-e5-small",
|
||||||
"jason9693/Qwen2.5-1.5B-apeach",
|
"jason9693/Qwen2.5-1.5B-apeach",
|
||||||
"llava-hf/llava-1.5-7b-hf",
|
"llava-hf/llava-1.5-7b-hf",
|
||||||
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user