vllm/tests/entrypoints/openai/test_score.py

# SPDX-License-Identifier: Apache-2.0

import pytest
import requests

from vllm.entrypoints.openai.protocol import ScoreResponse

from ...utils import RemoteOpenAIServer

MODEL_NAME = "BAAI/bge-reranker-v2-m3"


@pytest.fixture(scope="module")
def server():
    args = ["--enforce-eager", "--max-model-len", "100"]

    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_text_1_str_text_2_list(server: RemoteOpenAIServer, model_name: str):
    text_1 = "What is the capital of France?"
    text_2 = [
        "The capital of Brazil is Brasilia.", "The capital of France is Paris."
    ]

    score_response = requests.post(server.url_for("score"),
                                   json={
                                       "model": model_name,
                                       "text_1": text_1,
                                       "text_2": text_2,
                                   })
    score_response.raise_for_status()
    score = ScoreResponse.model_validate(score_response.json())

    assert score.id is not None
    assert score.data is not None
    assert len(score.data) == 2
    assert score.data[0].score <= 0.01
    assert score.data[1].score >= 0.9


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_text_1_list_text_2_list(server: RemoteOpenAIServer, model_name: str):
    text_1 = [
        "What is the capital of the United States?",
        "What is the capital of France?"
    ]
    text_2 = [
        "The capital of Brazil is Brasilia.", "The capital of France is Paris."
    ]

    score_response = requests.post(server.url_for("score"),
                                   json={
                                       "model": model_name,
                                       "text_1": text_1,
                                       "text_2": text_2,
                                   })
    score_response.raise_for_status()
    score = ScoreResponse.model_validate(score_response.json())

    assert score.id is not None
    assert score.data is not None
    assert len(score.data) == 2
    assert score.data[0].score <= 0.01
    assert score.data[1].score >= 0.9


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_text_1_str_text_2_str(server: RemoteOpenAIServer, model_name: str):
    text_1 = "What is the capital of France?"
    text_2 = "The capital of France is Paris."

    score_response = requests.post(server.url_for("score"),
                                   json={
                                       "model": model_name,
                                       "text_1": text_1,
                                       "text_2": text_2,
                                   })
    score_response.raise_for_status()
    score = ScoreResponse.model_validate(score_response.json())

    assert score.id is not None
    assert score.data is not None
    assert len(score.data) == 1
    assert score.data[0].score >= 0.9


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_score_max_model_len(server: RemoteOpenAIServer, model_name: str):

    text_1 = "What is the capital of France?" * 20
    text_2 = [
        "The capital of Brazil is Brasilia.", "The capital of France is Paris."
    ]

    score_response = requests.post(server.url_for("score"),
                                   json={
                                       "model": model_name,
                                       "text_1": text_1,
                                       "text_2": text_2,
                                   })
    assert score_response.status_code == 400
    # Assert just a small fragments of the response
    assert "Please reduce the length of the input." in \
        score_response.text

    # Test truncation
    score_response = requests.post(server.url_for("score"),
                                   json={
                                       "model": model_name,
                                       "text_1": text_1,
                                       "text_2": text_2,
                                       "truncate_prompt_tokens": 101
                                   })
    assert score_response.status_code == 400
    assert "Please, select a smaller truncation size." in \
        score_response.text