[Misc] Clean up test docstrings and names (#17521)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung 2025-05-01 20:19:32 +08:00 committed by GitHub
parent 1903c0b8a3
commit 48e925fab5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 51 additions and 115 deletions

View File

@ -395,10 +395,8 @@ steps:
- csrc/ - csrc/
- vllm/model_executor/layers/quantization - vllm/model_executor/layers/quantization
- tests/quantization - tests/quantization
- tests/models/quantization
commands: commands:
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
- pytest -v -s models/quantization
- label: LM Eval Small Models # 53min - label: LM Eval Small Models # 53min
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
@ -509,6 +507,14 @@ steps:
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model' - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
- label: Quantized Models Test
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/model_executor/layers/quantization
- tests/models/quantization
commands:
- pytest -v -s models/quantization
# This test is used only in PR development phase to test individual models and should never run on main # This test is used only in PR development phase to test individual models and should never run on main
- label: Custom Models Test - label: Custom Models Test
mirror_hardwares: [amd] mirror_hardwares: [amd]

View File

@ -1,9 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM when using greedy sampling.
Run `pytest tests/models/test_models.py`.
"""
import pytest import pytest
import torch import torch

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Granite models using greedy sampling.
Run `pytest tests/models/test_granite.py`.
"""
import pytest import pytest
from ...utils import check_logprobs_close from ...utils import check_logprobs_close

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
"""
import copy import copy
import json import json

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for moe models using greedy sampling.
Run `pytest tests/models/test_phimoe.py`.
"""
import pytest import pytest
import torch import torch

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the classification outputs of HF and vLLM models.
Run `pytest tests/models/test_cls_models.py`.
"""
import pytest import pytest
import torch import torch
from transformers import AutoModelForSequenceClassification from transformers import AutoModelForSequenceClassification
@ -19,7 +15,7 @@ from vllm.platforms import current_platform
) )
@pytest.mark.parametrize("dtype", @pytest.mark.parametrize("dtype",
["half"] if current_platform.is_rocm() else ["float"]) ["half"] if current_platform.is_rocm() else ["float"])
def test_classification_models( def test_models(
hf_runner, hf_runner,
vllm_runner, vllm_runner,
example_prompts, example_prompts,

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the embedding outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_embedding.py`.
"""
import pytest import pytest
from vllm.config import PoolerConfig from vllm.config import PoolerConfig

View File

@ -1,9 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# ruff: noqa: E501
"""Compare the scoring outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_jina.py`.
"""
import math import math
import pytest import pytest
@ -22,9 +17,9 @@ TEXTS_2 = [
"Organic skincare for sensitive skin with aloe vera and chamomile.", "Organic skincare for sensitive skin with aloe vera and chamomile.",
"New makeup trends focus on bold colors and innovative techniques", "New makeup trends focus on bold colors and innovative techniques",
"Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille", "Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille",
"Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken", "Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken", # noqa: E501
"Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla", "Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla", # noqa: E501
"Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras", "Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras", # noqa: E501
"针对敏感肌专门设计的天然有机护肤产品", "针对敏感肌专门设计的天然有机护肤产品",
"新的化妆趋势注重鲜艳的颜色和创新的技巧", "新的化妆趋势注重鲜艳的颜色和创新的技巧",
"敏感肌のために特別に設計された天然有機スキンケア製品", "敏感肌のために特別に設計された天然有機スキンケア製品",

View File

@ -1,15 +1,11 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the scoring outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_scoring.py`.
"""
import math import math
import pytest import pytest
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
MODELS = [ CROSS_ENCODER_MODELS = [
"cross-encoder/ms-marco-MiniLM-L-6-v2", # Bert "cross-encoder/ms-marco-MiniLM-L-6-v2", # Bert
"BAAI/bge-reranker-v2-m3", # Roberta "BAAI/bge-reranker-v2-m3", # Roberta
] ]
@ -28,21 +24,21 @@ TEXTS_2 = [
"The capital of Germany is Berlin.", "The capital of Germany is Berlin.",
] ]
DTYPE = "half"
@pytest.fixture(scope="module", params=MODELS)
@pytest.fixture(scope="module", params=CROSS_ENCODER_MODELS)
def model_name(request): def model_name(request):
yield request.param yield request.param
@pytest.mark.parametrize("dtype", ["half"]) def test_cross_encoder_1_to_1(vllm_runner, hf_runner, model_name):
def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
text_pair = [TEXTS_1[0], TEXTS_2[0]] text_pair = [TEXTS_1[0], TEXTS_2[0]]
with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model: with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model:
hf_outputs = hf_model.predict([text_pair]).tolist() hf_outputs = hf_model.predict([text_pair]).tolist()
with vllm_runner(model_name, task="score", dtype=dtype, with vllm_runner(model_name, task="score", dtype=DTYPE,
max_model_len=None) as vllm_model: max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.score(text_pair[0], text_pair[1]) vllm_outputs = vllm_model.score(text_pair[0], text_pair[1])
@ -52,18 +48,16 @@ def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01) assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01)
@pytest.mark.parametrize("dtype", ["half"]) def test_cross_encoder_1_to_N(vllm_runner, hf_runner, model_name):
def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
text_pairs = [ text_pairs = [
[TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[0], TEXTS_2[0]],
[TEXTS_1[0], TEXTS_2[1]], [TEXTS_1[0], TEXTS_2[1]],
] ]
with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model: with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model:
hf_outputs = hf_model.predict(text_pairs).tolist() hf_outputs = hf_model.predict(text_pairs).tolist()
with vllm_runner(model_name, task="score", dtype=dtype, with vllm_runner(model_name, task="score", dtype=DTYPE,
max_model_len=None) as vllm_model: max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2) vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2)
@ -74,18 +68,16 @@ def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01) assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01)
@pytest.mark.parametrize("dtype", ["half"]) def test_cross_encoder_N_to_N(vllm_runner, hf_runner, model_name):
def test_llm_N_to_N(vllm_runner, hf_runner, model_name, dtype: str):
text_pairs = [ text_pairs = [
[TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[0], TEXTS_2[0]],
[TEXTS_1[1], TEXTS_2[1]], [TEXTS_1[1], TEXTS_2[1]],
] ]
with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model: with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model:
hf_outputs = hf_model.predict(text_pairs).tolist() hf_outputs = hf_model.predict(text_pairs).tolist()
with vllm_runner(model_name, task="score", dtype=dtype, with vllm_runner(model_name, task="score", dtype=DTYPE,
max_model_len=None) as vllm_model: max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2) vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2)
@ -101,13 +93,10 @@ def emb_model_name(request):
yield request.param yield request.param
@pytest.mark.parametrize("dtype", ["half"]) def test_embedding_1_to_1(vllm_runner, hf_runner, emb_model_name):
def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
dtype: str):
text_pair = [TEXTS_1[0], TEXTS_2[0]] text_pair = [TEXTS_1[0], TEXTS_2[0]]
with hf_runner(emb_model_name, dtype=dtype, with hf_runner(emb_model_name, dtype=DTYPE,
is_sentence_transformer=True) as hf_model: is_sentence_transformer=True) as hf_model:
hf_embeddings = hf_model.encode(text_pair) hf_embeddings = hf_model.encode(text_pair)
hf_outputs = [ hf_outputs = [
@ -116,7 +105,7 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
with vllm_runner(emb_model_name, with vllm_runner(emb_model_name,
task="embed", task="embed",
dtype=dtype, dtype=DTYPE,
max_model_len=None) as vllm_model: max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.score(text_pair[0], text_pair[1]) vllm_outputs = vllm_model.score(text_pair[0], text_pair[1])
@ -126,16 +115,13 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01) assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01)
@pytest.mark.parametrize("dtype", ["half"]) def test_embedding_1_to_N(vllm_runner, hf_runner, emb_model_name):
def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
dtype: str):
text_pairs = [ text_pairs = [
[TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[0], TEXTS_2[0]],
[TEXTS_1[0], TEXTS_2[1]], [TEXTS_1[0], TEXTS_2[1]],
] ]
with hf_runner(emb_model_name, dtype=dtype, with hf_runner(emb_model_name, dtype=DTYPE,
is_sentence_transformer=True) as hf_model: is_sentence_transformer=True) as hf_model:
hf_embeddings = [ hf_embeddings = [
hf_model.encode(text_pair) for text_pair in text_pairs hf_model.encode(text_pair) for text_pair in text_pairs
@ -147,7 +133,7 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
with vllm_runner(emb_model_name, with vllm_runner(emb_model_name,
task="embed", task="embed",
dtype=dtype, dtype=DTYPE,
max_model_len=None) as vllm_model: max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2) vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2)
@ -158,16 +144,13 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01) assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01)
@pytest.mark.parametrize("dtype", ["half"]) def test_embedding_N_to_N(vllm_runner, hf_runner, emb_model_name):
def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
dtype: str):
text_pairs = [ text_pairs = [
[TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[0], TEXTS_2[0]],
[TEXTS_1[1], TEXTS_2[1]], [TEXTS_1[1], TEXTS_2[1]],
] ]
with hf_runner(emb_model_name, dtype=dtype, with hf_runner(emb_model_name, dtype=DTYPE,
is_sentence_transformer=True) as hf_model: is_sentence_transformer=True) as hf_model:
hf_embeddings = [ hf_embeddings = [
hf_model.encode(text_pair) for text_pair in text_pairs hf_model.encode(text_pair) for text_pair in text_pairs
@ -179,7 +162,7 @@ def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
with vllm_runner(emb_model_name, with vllm_runner(emb_model_name,
task="embed", task="embed",
dtype=dtype, dtype=DTYPE,
max_model_len=None) as vllm_model: max_model_len=None) as vllm_model:
vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2) vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2)

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the embedding outputs of HF and vLLM models.
Run `pytest tests/models/embedding/language/test_snowflake_arctic_embed.py`.
"""
import pytest import pytest
from ...utils import EmbedModelInfo, check_embeddings_close from ...utils import EmbedModelInfo, check_embeddings_close

View File

@ -5,18 +5,18 @@ MODEL_NAME = "sentence-transformers/all-MiniLM-L12-v2"
max_model_len = 128 max_model_len = 128
input_str = """Immerse yourself in the enchanting chronicle of calculus, a input_str = """Immerse yourself in the enchanting chronicle of calculus, a
mathematical domain that has radically transformed our comprehension of mathematical domain that has radically transformed our comprehension of
change and motion. Despite its roots in ancient civilizations, the change and motion. Despite its roots in ancient civilizations, the
formal birth of calculus predominantly occurred in the 17th century, formal birth of calculus predominantly occurred in the 17th century,
primarily under the influential guidance of Sir Isaac Newton and Gottfried primarily under the influential guidance of Sir Isaac Newton and Gottfried
Wilhelm Leibniz. The earliest traces of calculus concepts are found in Wilhelm Leibniz. The earliest traces of calculus concepts are found in
ancient Greek mathematics,most notably in the works of Eudoxus and ancient Greek mathematics,most notably in the works of Eudoxus and
Archimedes, around 300 BCE. They utilized the 'method of exhaustion'a Archimedes, around 300 BCE. They utilized the 'method of exhaustion'a
technique for computing areas and volumes through the use of finite sums. technique for computing areas and volumes through the use of finite sums.
This methodology laid crucial foundational work for integral calculus. This methodology laid crucial foundational work for integral calculus.
In the 17th century, both Newton and Leibniz independently pioneered In the 17th century, both Newton and Leibniz independently pioneered
calculus, each contributing unique perspectives that would shape this new calculus, each contributing unique perspectives that would shape this new
field.""" field."""
def test_smaller_truncation_size(vllm_runner, def test_smaller_truncation_size(vllm_runner,

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
"""
import json import json
from dataclasses import asdict from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional

View File

@ -119,10 +119,10 @@ def run_test(
assert output.outputs[0].text == expected assert output.outputs[0].text == expected
@create_new_process_for_each_test("spawn")
@pytest.mark.core_model @pytest.mark.core_model
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", ["openai/whisper-small", "openai/whisper-large-v3-turbo"]) "model", ["openai/whisper-small", "openai/whisper-large-v3-turbo"])
@create_new_process_for_each_test()
def test_models(vllm_runner, model) -> None: def test_models(vllm_runner, model) -> None:
run_test( run_test(
vllm_runner, vllm_runner,
@ -131,11 +131,11 @@ def test_models(vllm_runner, model) -> None:
) )
@create_new_process_for_each_test("spawn")
@multi_gpu_test(num_gpus=2) @multi_gpu_test(num_gpus=2)
@pytest.mark.core_model @pytest.mark.core_model
@pytest.mark.parametrize("model", ["openai/whisper-large-v3-turbo"]) @pytest.mark.parametrize("model", ["openai/whisper-large-v3-turbo"])
@pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"]) @pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
@create_new_process_for_each_test()
def test_models_distributed( def test_models_distributed(
vllm_runner, vllm_runner,
model, model,

View File

@ -1,9 +1,4 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of a AQLM model between vLLM and HF Transformers
Run `pytest tests/models/test_aqlm.py`.
"""
import pytest import pytest
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported

View File

@ -8,8 +8,6 @@ bitblas/GPTQ models are in the top 3 selections of each other.
Note: bitblas internally uses locks to synchronize the threads. This can Note: bitblas internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for bitblas. As a result, we re-run the result in very slight nondeterminism for bitblas. As a result, we re-run the
test up to 3 times to see if we pass. test up to 3 times to see if we pass.
Run `pytest tests/models/test_bitblas.py`.
""" """
from dataclasses import dataclass from dataclasses import dataclass

View File

@ -8,8 +8,6 @@ bitblas/GPTQ models are in the top 3 selections of each other.
Note: bitblas internally uses locks to synchronize the threads. This can Note: bitblas internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for bitblas. As a result, we re-run the result in very slight nondeterminism for bitblas. As a result, we re-run the
test up to 3 times to see if we pass. test up to 3 times to see if we pass.
Run `pytest tests/models/test_bitblas.py`.
""" """
from dataclasses import dataclass from dataclasses import dataclass

View File

@ -1,13 +1,12 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Compares the outputs of gptq vs gptq_marlin """Compares the outputs of gptq vs gptq_marlin.
Note: GPTQ and Marlin do not have bitwise correctness. Note: GPTQ and Marlin do not have bitwise correctness.
As a result, in this test, we just confirm that the top selected tokens of the As a result, in this test, we just confirm that the top selected tokens of the
Marlin/GPTQ models are in the top 5 selections of each other. Marlin/GPTQ models are in the top 5 selections of each other.
Note: Marlin internally uses locks to synchronize the threads. This can Note: Marlin internally uses locks to synchronize the threads. This can
result in very slight nondeterminism for Marlin. As a result, we re-run the test result in very slight nondeterminism for Marlin. As a result, we re-run the test
up to 3 times to see if we pass. up to 3 times to see if we pass.
Run `pytest tests/models/test_gptq_marlin.py`.
""" """
import os import os

View File

@ -4,8 +4,6 @@
Note: GPTQ and Marlin_24 do not have bitwise correctness. Note: GPTQ and Marlin_24 do not have bitwise correctness.
As a result, in this test, we just confirm that the top selected tokens of the As a result, in this test, we just confirm that the top selected tokens of the
Marlin/GPTQ models are in the top 3 selections of each other. Marlin/GPTQ models are in the top 3 selections of each other.
Run `pytest tests/models/test_marlin_24.py`.
""" """
from dataclasses import dataclass from dataclasses import dataclass

View File

@ -1,8 +1,5 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Test the functionality of the Transformers backend. """Test the functionality of the Transformers backend."""
Run `pytest tests/models/test_transformers.py`.
"""
import pytest import pytest
from ..conftest import HfRunner, VllmRunner from ..conftest import HfRunner, VllmRunner