[Bugfix] Fixed when return_token_ids=False, the first event still contains prompt_token_ids. (#27561)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
2025-12-14 18:25:01 +08:00 · 2025-10-27 19:06:43 +08:00 · 2025-10-27 19:06:43 +08:00 · a4fc21895e
commit a4fc21895e
parent a3e8611da5
2 changed files with 13 additions and 3 deletions
--- a/tests/entrypoints/openai/test_return_token_ids.py
+++ b/tests/entrypoints/openai/test_return_token_ids.py
@ -27,8 +27,12 @@ def server():
@pytest.mark.asyncio
-async def test_basic_completion_with_emoji(server):
+@pytest.mark.parametrize("return_token_ids", [True, False, None])
 async def test_basic_completion_with_emoji(server, return_token_ids: bool | None):
    """Test basic completion with emoji to verify token_ids field."""
    extra_body = None
    if return_token_ids is not None:
        extra_body = {"return_token_ids": return_token_ids}
    async with server.get_async_client() as client:
        # Test with return_token_ids enabled
        completion = await client.completions.create(
@ -37,7 +41,7 @@ async def test_basic_completion_with_emoji(server):
            max_tokens=10,
            temperature=0,
            logprobs=1,
-            extra_body={"return_token_ids": True},
+            extra_body=extra_body,
        )
        # Check the raw response to see the structure
@ -45,6 +49,12 @@ async def test_basic_completion_with_emoji(server):
        # Verify prompt_token_ids field is present in the completion response
        assert "prompt_token_ids" in completion_dict["choices"][0]
        if not return_token_ids:
            # If return_token_ids is False, token_ids should not be present
            assert completion_dict["choices"][0].get("token_ids") is None
            assert completion_dict["choices"][0].get("prompt_token_ids") is None
            # Skip further checks
            return
        assert isinstance(completion.choices[0].prompt_token_ids, list)
        # Check against the expected prompt token IDs
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@ -399,7 +399,7 @@ class OpenAIServingCompletion(OpenAIServing):
                        # has_echoed[i] is reused here to indicate whether
                        # we have already returned the prompt token IDs.
-                        if not has_echoed[i]:
+                        if not has_echoed[i] and request.return_token_ids:
                            prompt_token_ids_to_return = prompt_token_ids
                            has_echoed[i] = True