From a4fc21895ed279930e5998eff4b9480da9d8442a Mon Sep 17 00:00:00 2001 From: Chauncey Date: Mon, 27 Oct 2025 19:06:43 +0800 Subject: [PATCH] [Bugfix] Fixed when return_token_ids=False, the first event still contains prompt_token_ids. (#27561) Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_return_token_ids.py | 14 ++++++++++++-- vllm/entrypoints/openai/serving_completion.py | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/test_return_token_ids.py b/tests/entrypoints/openai/test_return_token_ids.py index 60a80210fb768..feef48a36dfa1 100644 --- a/tests/entrypoints/openai/test_return_token_ids.py +++ b/tests/entrypoints/openai/test_return_token_ids.py @@ -27,8 +27,12 @@ def server(): @pytest.mark.asyncio -async def test_basic_completion_with_emoji(server): +@pytest.mark.parametrize("return_token_ids", [True, False, None]) +async def test_basic_completion_with_emoji(server, return_token_ids: bool | None): """Test basic completion with emoji to verify token_ids field.""" + extra_body = None + if return_token_ids is not None: + extra_body = {"return_token_ids": return_token_ids} async with server.get_async_client() as client: # Test with return_token_ids enabled completion = await client.completions.create( @@ -37,7 +41,7 @@ async def test_basic_completion_with_emoji(server): max_tokens=10, temperature=0, logprobs=1, - extra_body={"return_token_ids": True}, + extra_body=extra_body, ) # Check the raw response to see the structure @@ -45,6 +49,12 @@ async def test_basic_completion_with_emoji(server): # Verify prompt_token_ids field is present in the completion response assert "prompt_token_ids" in completion_dict["choices"][0] + if not return_token_ids: + # If return_token_ids is False, token_ids should not be present + assert completion_dict["choices"][0].get("token_ids") is None + assert completion_dict["choices"][0].get("prompt_token_ids") is None + # Skip further checks + return assert isinstance(completion.choices[0].prompt_token_ids, list) # Check against the expected prompt token IDs diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 44211201d49a6..62bc932f8b844 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -399,7 +399,7 @@ class OpenAIServingCompletion(OpenAIServing): # has_echoed[i] is reused here to indicate whether # we have already returned the prompt token IDs. - if not has_echoed[i]: + if not has_echoed[i] and request.return_token_ids: prompt_token_ids_to_return = prompt_token_ids has_echoed[i] = True