mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:44:56 +08:00
[CI]: reduce HTTP calls inside entrypoints openai tests (#23646)
Signed-off-by: AzizCode92 <azizbenothman76@gmail.com> Signed-off-by: Aziz <azizbenothman76@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
2f0bab3f26
commit
ce30dca5c4
@ -201,3 +201,32 @@ table: "table_1" | "table_2"
|
|||||||
condition: column "=" number
|
condition: column "=" number
|
||||||
number: "1" | "2"
|
number: "1" | "2"
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def zephyr_lora_files():
|
||||||
|
"""Download zephyr LoRA files once per test session."""
|
||||||
|
from huggingface_hub import snapshot_download
|
||||||
|
return snapshot_download(repo_id="typeof/zephyr-7b-beta-lora")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def zephyr_lora_added_tokens_files(zephyr_lora_files):
|
||||||
|
"""Create zephyr LoRA files with added tokens once per test session."""
|
||||||
|
import shutil
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
|
tmp_dir = TemporaryDirectory()
|
||||||
|
tmp_model_dir = f"{tmp_dir.name}/zephyr"
|
||||||
|
shutil.copytree(zephyr_lora_files, tmp_model_dir)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
||||||
|
# Copy tokenizer to adapter and add some unique tokens
|
||||||
|
# 32000, 32001, 32002
|
||||||
|
added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
|
||||||
|
special_tokens=True)
|
||||||
|
assert added == 3
|
||||||
|
tokenizer.save_pretrained(tmp_model_dir)
|
||||||
|
yield tmp_model_dir
|
||||||
|
tmp_dir.cleanup()
|
||||||
|
|||||||
@ -15,8 +15,6 @@ import torch
|
|||||||
from openai import BadRequestError, OpenAI
|
from openai import BadRequestError, OpenAI
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
|
|
||||||
from .test_completion import zephyr_lora_files # noqa: F401
|
|
||||||
|
|
||||||
# any model with a chat template should work here
|
# any model with a chat template should work here
|
||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||||
|
|||||||
@ -3,8 +3,6 @@
|
|||||||
# imports for guided decoding tests
|
# imports for guided decoding tests
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import jsonschema
|
import jsonschema
|
||||||
@ -14,9 +12,7 @@ import pytest_asyncio
|
|||||||
import regex as re
|
import regex as re
|
||||||
import requests
|
import requests
|
||||||
# downloading lora to test lora requests
|
# downloading lora to test lora requests
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
from openai import BadRequestError
|
from openai import BadRequestError
|
||||||
from transformers import AutoTokenizer
|
|
||||||
|
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
|
||||||
@ -26,32 +22,10 @@ from ...utils import RemoteOpenAIServer
|
|||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||||
# technically these adapters use a different base model,
|
# technically these adapters use a different base model,
|
||||||
# but we're not testing generation quality here
|
# but we're not testing generation quality here
|
||||||
LORA_NAME = "typeof/zephyr-7b-beta-lora"
|
|
||||||
|
|
||||||
GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"]
|
GUIDED_DECODING_BACKENDS = ["outlines", "xgrammar", "guidance"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def zephyr_lora_files():
|
|
||||||
return snapshot_download(repo_id=LORA_NAME)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def zephyr_lora_added_tokens_files(zephyr_lora_files):
|
|
||||||
tmp_dir = TemporaryDirectory()
|
|
||||||
tmp_model_dir = f"{tmp_dir.name}/zephyr"
|
|
||||||
shutil.copytree(zephyr_lora_files, tmp_model_dir)
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
||||||
# Copy tokenizer to adapter and add some unique tokens
|
|
||||||
# 32000, 32001, 32002
|
|
||||||
added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
|
|
||||||
special_tokens=True)
|
|
||||||
assert added == 3
|
|
||||||
tokenizer.save_pretrained(tmp_model_dir)
|
|
||||||
yield tmp_model_dir
|
|
||||||
tmp_dir.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files):
|
def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files):
|
||||||
return [
|
return [
|
||||||
|
|||||||
@ -3,48 +3,23 @@
|
|||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import shutil
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
import openai # use the official client for correctness check
|
import openai # use the official client for correctness check
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
import torch
|
import torch
|
||||||
# downloading lora to test lora requests
|
# downloading lora to test lora requests
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
from openai import BadRequestError
|
from openai import BadRequestError
|
||||||
from transformers import AutoConfig, AutoTokenizer
|
from transformers import AutoConfig
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
# any model with a chat template should work here
|
# any model with a chat template should work here
|
||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||||
LORA_NAME = "typeof/zephyr-7b-beta-lora"
|
|
||||||
|
|
||||||
CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
|
CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def zephyr_lora_files():
|
|
||||||
return snapshot_download(repo_id=LORA_NAME)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def zephyr_lora_added_tokens_files(zephyr_lora_files):
|
|
||||||
tmp_dir = TemporaryDirectory()
|
|
||||||
tmp_model_dir = f"{tmp_dir.name}/zephyr"
|
|
||||||
shutil.copytree(zephyr_lora_files, tmp_model_dir)
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
||||||
# Copy tokenizer to adapter and add some unique tokens
|
|
||||||
# 32000, 32001, 32002
|
|
||||||
added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"],
|
|
||||||
special_tokens=True)
|
|
||||||
assert added == 3
|
|
||||||
tokenizer.save_pretrained(tmp_model_dir)
|
|
||||||
yield tmp_model_dir
|
|
||||||
tmp_dir.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def default_server_args(
|
def default_server_args(
|
||||||
zephyr_lora_files,
|
zephyr_lora_files,
|
||||||
|
|||||||
@ -9,8 +9,6 @@ from contextlib import suppress
|
|||||||
import openai # use the official client for correctness check
|
import openai # use the official client for correctness check
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
# downloading lora to test lora requests
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
@ -18,7 +16,6 @@ from ...utils import RemoteOpenAIServer
|
|||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||||
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
|
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
|
||||||
# generation quality here
|
# generation quality here
|
||||||
LORA_NAME = "typeof/zephyr-7b-beta-lora"
|
|
||||||
|
|
||||||
BADREQUEST_CASES = [
|
BADREQUEST_CASES = [
|
||||||
(
|
(
|
||||||
@ -48,11 +45,6 @@ BADREQUEST_CASES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def zephyr_lora_files():
|
|
||||||
return snapshot_download(repo_id=LORA_NAME)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def monkeypatch_module():
|
def monkeypatch_module():
|
||||||
from _pytest.monkeypatch import MonkeyPatch
|
from _pytest.monkeypatch import MonkeyPatch
|
||||||
|
|||||||
@ -4,8 +4,6 @@
|
|||||||
import openai # use the official client for correctness check
|
import openai # use the official client for correctness check
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
# downloading lora to test lora requests
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
@ -13,12 +11,6 @@ from ...utils import RemoteOpenAIServer
|
|||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||||
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
|
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
|
||||||
# generation quality here
|
# generation quality here
|
||||||
LORA_NAME = "typeof/zephyr-7b-beta-lora"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
|
||||||
def zephyr_lora_files():
|
|
||||||
return snapshot_download(repo_id=LORA_NAME)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
|
|||||||
@ -11,8 +11,6 @@ from vllm.transformers_utils.tokenizer import get_tokenizer
|
|||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
from .test_completion import default_server_args # noqa: F401
|
from .test_completion import default_server_args # noqa: F401
|
||||||
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
|
|
||||||
from .test_completion import zephyr_lora_files # noqa: F401
|
|
||||||
from .test_completion import MODEL_NAME
|
from .test_completion import MODEL_NAME
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -8,8 +8,6 @@ import requests
|
|||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
|
||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
|
|
||||||
from .test_completion import zephyr_lora_files # noqa: F401
|
|
||||||
|
|
||||||
# any model with a chat template should work here
|
# any model with a chat template should work here
|
||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user