mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 18:25:01 +08:00
[Bugfix] Fixed when return_token_ids=False, the first event still contains prompt_token_ids. (#27561)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
a3e8611da5
commit
a4fc21895e
@ -27,8 +27,12 @@ def server():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_basic_completion_with_emoji(server):
|
@pytest.mark.parametrize("return_token_ids", [True, False, None])
|
||||||
|
async def test_basic_completion_with_emoji(server, return_token_ids: bool | None):
|
||||||
"""Test basic completion with emoji to verify token_ids field."""
|
"""Test basic completion with emoji to verify token_ids field."""
|
||||||
|
extra_body = None
|
||||||
|
if return_token_ids is not None:
|
||||||
|
extra_body = {"return_token_ids": return_token_ids}
|
||||||
async with server.get_async_client() as client:
|
async with server.get_async_client() as client:
|
||||||
# Test with return_token_ids enabled
|
# Test with return_token_ids enabled
|
||||||
completion = await client.completions.create(
|
completion = await client.completions.create(
|
||||||
@ -37,7 +41,7 @@ async def test_basic_completion_with_emoji(server):
|
|||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
logprobs=1,
|
logprobs=1,
|
||||||
extra_body={"return_token_ids": True},
|
extra_body=extra_body,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check the raw response to see the structure
|
# Check the raw response to see the structure
|
||||||
@ -45,6 +49,12 @@ async def test_basic_completion_with_emoji(server):
|
|||||||
|
|
||||||
# Verify prompt_token_ids field is present in the completion response
|
# Verify prompt_token_ids field is present in the completion response
|
||||||
assert "prompt_token_ids" in completion_dict["choices"][0]
|
assert "prompt_token_ids" in completion_dict["choices"][0]
|
||||||
|
if not return_token_ids:
|
||||||
|
# If return_token_ids is False, token_ids should not be present
|
||||||
|
assert completion_dict["choices"][0].get("token_ids") is None
|
||||||
|
assert completion_dict["choices"][0].get("prompt_token_ids") is None
|
||||||
|
# Skip further checks
|
||||||
|
return
|
||||||
assert isinstance(completion.choices[0].prompt_token_ids, list)
|
assert isinstance(completion.choices[0].prompt_token_ids, list)
|
||||||
|
|
||||||
# Check against the expected prompt token IDs
|
# Check against the expected prompt token IDs
|
||||||
|
|||||||
@ -399,7 +399,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
|||||||
|
|
||||||
# has_echoed[i] is reused here to indicate whether
|
# has_echoed[i] is reused here to indicate whether
|
||||||
# we have already returned the prompt token IDs.
|
# we have already returned the prompt token IDs.
|
||||||
if not has_echoed[i]:
|
if not has_echoed[i] and request.return_token_ids:
|
||||||
prompt_token_ids_to_return = prompt_token_ids
|
prompt_token_ids_to_return = prompt_token_ids
|
||||||
has_echoed[i] = True
|
has_echoed[i] = True
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user