From ce30dca5c44353f278dc114bd6f03b11700088eb Mon Sep 17 00:00:00 2001 From: Aziz Date: Tue, 2 Sep 2025 12:49:32 +0200 Subject: [PATCH] [CI]: reduce HTTP calls inside entrypoints openai tests (#23646) Signed-off-by: AzizCode92 Signed-off-by: Aziz Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/entrypoints/conftest.py | 29 +++++++++++++++++++ tests/entrypoints/openai/test_chat.py | 2 -- tests/entrypoints/openai/test_completion.py | 26 ----------------- .../test_completion_with_prompt_embeds.py | 27 +---------------- .../entrypoints/openai/test_lora_adapters.py | 8 ----- tests/entrypoints/openai/test_models.py | 8 ----- .../openai/test_return_tokens_as_ids.py | 2 -- tests/entrypoints/openai/test_tokenization.py | 2 -- 8 files changed, 30 insertions(+), 74 deletions(-) diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py index a7c533ec2419..48fd848e8820 100644 --- a/tests/entrypoints/conftest.py +++ b/tests/entrypoints/conftest.py @@ -201,3 +201,32 @@ table: "table_1" | "table_2" condition: column "=" number number: "1" | "2" """) + + +@pytest.fixture(scope="session") +def zephyr_lora_files(): + """Download zephyr LoRA files once per test session.""" + from huggingface_hub import snapshot_download + return snapshot_download(repo_id="typeof/zephyr-7b-beta-lora") + + +@pytest.fixture(scope="session") +def zephyr_lora_added_tokens_files(zephyr_lora_files): + """Create zephyr LoRA files with added tokens once per test session.""" + import shutil + from tempfile import TemporaryDirectory + + from transformers import AutoTokenizer + + tmp_dir = TemporaryDirectory() + tmp_model_dir = f"{tmp_dir.name}/zephyr" + shutil.copytree(zephyr_lora_files, tmp_model_dir) + tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") + # Copy tokenizer to adapter and add some unique tokens + # 32000, 32001, 32002 + added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"], + special_tokens=True) + assert added == 3 + tokenizer.save_pretrained(tmp_model_dir) + yield tmp_model_dir + tmp_dir.cleanup() diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 5ad29d70f10d..c9947c54a918 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -15,8 +15,6 @@ import torch from openai import BadRequestError, OpenAI from ...utils import RemoteOpenAIServer -from .test_completion import zephyr_lora_added_tokens_files # noqa: F401 -from .test_completion import zephyr_lora_files # noqa: F401 # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py index 74ef6deeea16..d55f8d9d65d9 100644 --- a/tests/entrypoints/openai/test_completion.py +++ b/tests/entrypoints/openai/test_completion.py @@ -3,8 +3,6 @@ # imports for guided decoding tests import json import os -import shutil -from tempfile import TemporaryDirectory from typing import Optional import jsonschema @@ -14,9 +12,7 @@ import pytest_asyncio import regex as re import requests # downloading lora to test lora requests -from huggingface_hub import snapshot_download from openai import BadRequestError -from transformers import AutoTokenizer from vllm.transformers_utils.tokenizer import get_tokenizer @@ -26,32 +22,10 @@ from ...utils import RemoteOpenAIServer MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" # technically these adapters use a different base model, # but we're not testing generation quality here -LORA_NAME = "typeof/zephyr-7b-beta-lora" GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"] -@pytest.fixture(scope="module") -def zephyr_lora_files(): - return snapshot_download(repo_id=LORA_NAME) - - -@pytest.fixture(scope="module") -def zephyr_lora_added_tokens_files(zephyr_lora_files): - tmp_dir = TemporaryDirectory() - tmp_model_dir = f"{tmp_dir.name}/zephyr" - shutil.copytree(zephyr_lora_files, tmp_model_dir) - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) - # Copy tokenizer to adapter and add some unique tokens - # 32000, 32001, 32002 - added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"], - special_tokens=True) - assert added == 3 - tokenizer.save_pretrained(tmp_model_dir) - yield tmp_model_dir - tmp_dir.cleanup() - - @pytest.fixture(scope="module") def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files): return [ diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index 00d3ffb61ee9..a0ef31762ea1 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -3,48 +3,23 @@ import base64 import io -import shutil -from tempfile import TemporaryDirectory import openai # use the official client for correctness check import pytest import pytest_asyncio import torch # downloading lora to test lora requests -from huggingface_hub import snapshot_download from openai import BadRequestError -from transformers import AutoConfig, AutoTokenizer +from transformers import AutoConfig from ...utils import RemoteOpenAIServer # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" -LORA_NAME = "typeof/zephyr-7b-beta-lora" CONFIG = AutoConfig.from_pretrained(MODEL_NAME) -@pytest.fixture(scope="module") -def zephyr_lora_files(): - return snapshot_download(repo_id=LORA_NAME) - - -@pytest.fixture(scope="module") -def zephyr_lora_added_tokens_files(zephyr_lora_files): - tmp_dir = TemporaryDirectory() - tmp_model_dir = f"{tmp_dir.name}/zephyr" - shutil.copytree(zephyr_lora_files, tmp_model_dir) - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) - # Copy tokenizer to adapter and add some unique tokens - # 32000, 32001, 32002 - added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"], - special_tokens=True) - assert added == 3 - tokenizer.save_pretrained(tmp_model_dir) - yield tmp_model_dir - tmp_dir.cleanup() - - @pytest.fixture(scope="module") def default_server_args( zephyr_lora_files, diff --git a/tests/entrypoints/openai/test_lora_adapters.py b/tests/entrypoints/openai/test_lora_adapters.py index bcdeaaacedea..f91dcf194b83 100644 --- a/tests/entrypoints/openai/test_lora_adapters.py +++ b/tests/entrypoints/openai/test_lora_adapters.py @@ -9,8 +9,6 @@ from contextlib import suppress import openai # use the official client for correctness check import pytest import pytest_asyncio -# downloading lora to test lora requests -from huggingface_hub import snapshot_download from ...utils import RemoteOpenAIServer @@ -18,7 +16,6 @@ from ...utils import RemoteOpenAIServer MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" # technically this needs Mistral-7B-v0.1 as base, but we're not testing # generation quality here -LORA_NAME = "typeof/zephyr-7b-beta-lora" BADREQUEST_CASES = [ ( @@ -48,11 +45,6 @@ BADREQUEST_CASES = [ ] -@pytest.fixture(scope="module") -def zephyr_lora_files(): - return snapshot_download(repo_id=LORA_NAME) - - @pytest.fixture(scope="module") def monkeypatch_module(): from _pytest.monkeypatch import MonkeyPatch diff --git a/tests/entrypoints/openai/test_models.py b/tests/entrypoints/openai/test_models.py index 1980daa80db9..7cd3ca196a43 100644 --- a/tests/entrypoints/openai/test_models.py +++ b/tests/entrypoints/openai/test_models.py @@ -4,8 +4,6 @@ import openai # use the official client for correctness check import pytest import pytest_asyncio -# downloading lora to test lora requests -from huggingface_hub import snapshot_download from ...utils import RemoteOpenAIServer @@ -13,12 +11,6 @@ from ...utils import RemoteOpenAIServer MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" # technically this needs Mistral-7B-v0.1 as base, but we're not testing # generation quality here -LORA_NAME = "typeof/zephyr-7b-beta-lora" - - -@pytest.fixture(scope="module") -def zephyr_lora_files(): - return snapshot_download(repo_id=LORA_NAME) @pytest.fixture(scope="module") diff --git a/tests/entrypoints/openai/test_return_tokens_as_ids.py b/tests/entrypoints/openai/test_return_tokens_as_ids.py index af58fbd4b364..5f43fdc9588f 100644 --- a/tests/entrypoints/openai/test_return_tokens_as_ids.py +++ b/tests/entrypoints/openai/test_return_tokens_as_ids.py @@ -11,8 +11,6 @@ from vllm.transformers_utils.tokenizer import get_tokenizer from ...utils import RemoteOpenAIServer from .test_completion import default_server_args # noqa: F401 -from .test_completion import zephyr_lora_added_tokens_files # noqa: F401 -from .test_completion import zephyr_lora_files # noqa: F401 from .test_completion import MODEL_NAME diff --git a/tests/entrypoints/openai/test_tokenization.py b/tests/entrypoints/openai/test_tokenization.py index 0dbbdfbfd24a..72c8a3510c9b 100644 --- a/tests/entrypoints/openai/test_tokenization.py +++ b/tests/entrypoints/openai/test_tokenization.py @@ -8,8 +8,6 @@ import requests from vllm.transformers_utils.tokenizer import get_tokenizer from ...utils import RemoteOpenAIServer -from .test_completion import zephyr_lora_added_tokens_files # noqa: F401 -from .test_completion import zephyr_lora_files # noqa: F401 # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"