mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-08 02:49:09 +08:00
[CI/Build] Fix flaky entrypoints test (#25663)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
70fbdb26e9
commit
0bcc3a160d
@ -15,7 +15,7 @@ from transformers import AutoConfig
|
|||||||
from ...utils import RemoteOpenAIServer
|
from ...utils import RemoteOpenAIServer
|
||||||
|
|
||||||
# any model with a chat template should work here
|
# any model with a chat template should work here
|
||||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
MODEL_NAME = "facebook/opt-125m"
|
||||||
|
|
||||||
CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
|
CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ def default_server_args() -> list[str]:
|
|||||||
"--dtype",
|
"--dtype",
|
||||||
"bfloat16",
|
"bfloat16",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"8192",
|
"2048",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
"128",
|
"128",
|
||||||
"--enforce-eager",
|
"--enforce-eager",
|
||||||
@ -36,6 +36,27 @@ def default_server_args() -> list[str]:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
EXAMPLE_PROMPTS = [
|
||||||
|
"Hello, my name is",
|
||||||
|
"What is an LLM?",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _encode_embeds(embeds: torch.Tensor):
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
torch.save(embeds, buffer)
|
||||||
|
return base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def example_prompt_embeds(hf_runner):
|
||||||
|
"""Create example embeddings and return them as base64 encoded string."""
|
||||||
|
with hf_runner(MODEL_NAME) as hf_model:
|
||||||
|
example_embeddings = hf_model.get_prompt_embeddings(EXAMPLE_PROMPTS)
|
||||||
|
|
||||||
|
return [_encode_embeds(item) for item in example_embeddings]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module",
|
@pytest.fixture(scope="module",
|
||||||
params=["", "--disable-frontend-multiprocessing"])
|
params=["", "--disable-frontend-multiprocessing"])
|
||||||
def server_with_prompt_embeds(default_server_args, request):
|
def server_with_prompt_embeds(default_server_args, request):
|
||||||
@ -52,21 +73,16 @@ async def client_with_prompt_embeds(server_with_prompt_embeds):
|
|||||||
yield async_client
|
yield async_client
|
||||||
|
|
||||||
|
|
||||||
def create_dummy_embeds(num_tokens: int = 5) -> str:
|
|
||||||
"""Create dummy embeddings and return them as base64 encoded string."""
|
|
||||||
dummy_embeds = torch.randn(num_tokens, CONFIG.hidden_size)
|
|
||||||
buffer = io.BytesIO()
|
|
||||||
torch.save(dummy_embeds, buffer)
|
|
||||||
return base64.b64encode(buffer.getvalue()).decode('utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip("This test is skipped because it is flaky.")
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_completions_with_prompt_embeds(
|
async def test_completions_with_prompt_embeds(
|
||||||
client_with_prompt_embeds: openai.AsyncOpenAI, model_name: str):
|
example_prompt_embeds,
|
||||||
|
client_with_prompt_embeds: openai.AsyncOpenAI,
|
||||||
|
model_name: str,
|
||||||
|
):
|
||||||
|
encoded_embeds, encoded_embeds2 = example_prompt_embeds
|
||||||
|
|
||||||
# Test case: Single prompt embeds input
|
# Test case: Single prompt embeds input
|
||||||
encoded_embeds = create_dummy_embeds()
|
|
||||||
completion = await client_with_prompt_embeds.completions.create(
|
completion = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="", # Add empty prompt as required parameter
|
prompt="", # Add empty prompt as required parameter
|
||||||
@ -77,7 +93,6 @@ async def test_completions_with_prompt_embeds(
|
|||||||
assert completion.choices[0].prompt_logprobs is None
|
assert completion.choices[0].prompt_logprobs is None
|
||||||
|
|
||||||
# Test case: batch completion with prompt_embeds
|
# Test case: batch completion with prompt_embeds
|
||||||
encoded_embeds2 = create_dummy_embeds()
|
|
||||||
completion = await client_with_prompt_embeds.completions.create(
|
completion = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="", # Add empty prompt as required parameter
|
prompt="", # Add empty prompt as required parameter
|
||||||
@ -89,7 +104,6 @@ async def test_completions_with_prompt_embeds(
|
|||||||
assert len(completion.choices[1].text) >= 1
|
assert len(completion.choices[1].text) >= 1
|
||||||
|
|
||||||
# Test case: streaming with prompt_embeds
|
# Test case: streaming with prompt_embeds
|
||||||
encoded_embeds = create_dummy_embeds()
|
|
||||||
single_completion = await client_with_prompt_embeds.completions.create(
|
single_completion = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="", # Add empty prompt as required parameter
|
prompt="", # Add empty prompt as required parameter
|
||||||
@ -117,7 +131,6 @@ async def test_completions_with_prompt_embeds(
|
|||||||
assert "".join(chunks) == single_output
|
assert "".join(chunks) == single_output
|
||||||
|
|
||||||
# Test case: batch streaming with prompt_embeds
|
# Test case: batch streaming with prompt_embeds
|
||||||
encoded_embeds2 = create_dummy_embeds()
|
|
||||||
stream = await client_with_prompt_embeds.completions.create(
|
stream = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="", # Add empty prompt as required parameter
|
prompt="", # Add empty prompt as required parameter
|
||||||
@ -139,7 +152,6 @@ async def test_completions_with_prompt_embeds(
|
|||||||
assert len(chunks_stream_embeds[1]) > 0
|
assert len(chunks_stream_embeds[1]) > 0
|
||||||
|
|
||||||
# Test case: mixed text and prompt_embeds
|
# Test case: mixed text and prompt_embeds
|
||||||
encoded_embeds = create_dummy_embeds()
|
|
||||||
completion_mixed = await client_with_prompt_embeds.completions.create(
|
completion_mixed = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="This is a prompt",
|
prompt="This is a prompt",
|
||||||
@ -184,10 +196,14 @@ async def test_completions_errors_with_prompt_embeds(
|
|||||||
@pytest.mark.parametrize("logprobs_arg", [1, 0])
|
@pytest.mark.parametrize("logprobs_arg", [1, 0])
|
||||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||||
async def test_completions_with_logprobs_and_prompt_embeds(
|
async def test_completions_with_logprobs_and_prompt_embeds(
|
||||||
client_with_prompt_embeds: openai.AsyncOpenAI, logprobs_arg: int,
|
example_prompt_embeds,
|
||||||
model_name: str):
|
client_with_prompt_embeds: openai.AsyncOpenAI,
|
||||||
|
logprobs_arg: int,
|
||||||
|
model_name: str,
|
||||||
|
):
|
||||||
|
encoded_embeds, encoded_embeds2 = example_prompt_embeds
|
||||||
|
|
||||||
# Test case: Logprobs using prompt_embeds
|
# Test case: Logprobs using prompt_embeds
|
||||||
encoded_embeds = create_dummy_embeds()
|
|
||||||
completion = await client_with_prompt_embeds.completions.create(
|
completion = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="", # Add empty prompt as required parameter
|
prompt="", # Add empty prompt as required parameter
|
||||||
@ -207,7 +223,6 @@ async def test_completions_with_logprobs_and_prompt_embeds(
|
|||||||
assert len(logprobs.tokens) == 5
|
assert len(logprobs.tokens) == 5
|
||||||
|
|
||||||
# Test case: Log probs with batch completion and prompt_embeds
|
# Test case: Log probs with batch completion and prompt_embeds
|
||||||
encoded_embeds2 = create_dummy_embeds()
|
|
||||||
completion = await client_with_prompt_embeds.completions.create(
|
completion = await client_with_prompt_embeds.completions.create(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
prompt="", # Add empty prompt as required parameter
|
prompt="", # Add empty prompt as required parameter
|
||||||
@ -232,9 +247,12 @@ async def test_completions_with_logprobs_and_prompt_embeds(
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_prompt_logprobs_raises_error(
|
async def test_prompt_logprobs_raises_error(
|
||||||
client_with_prompt_embeds: openai.AsyncOpenAI):
|
example_prompt_embeds,
|
||||||
|
client_with_prompt_embeds: openai.AsyncOpenAI,
|
||||||
|
):
|
||||||
|
encoded_embeds, _ = example_prompt_embeds
|
||||||
|
|
||||||
with pytest.raises(BadRequestError, match="not compatible"):
|
with pytest.raises(BadRequestError, match="not compatible"):
|
||||||
encoded_embeds = create_dummy_embeds()
|
|
||||||
await client_with_prompt_embeds.completions.create(
|
await client_with_prompt_embeds.completions.create(
|
||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
prompt="",
|
prompt="",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user