From a964e5e6c35e8f22bd7663dcf93d1c801421a029 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Sun, 5 Oct 2025 13:38:53 +0800
Subject: [PATCH] [Bugfix] Allow `--skip-tokenizer-init` with `echo and
 return_token_ids` (#26238)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/entrypoints/openai/test_token_in_token_out.py | 2 +-
 vllm/entrypoints/openai/serving_completion.py       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/entrypoints/openai/test_token_in_token_out.py b/tests/entrypoints/openai/test_token_in_token_out.py
index f84605690c539..ed003939c44be 100644
--- a/tests/entrypoints/openai/test_token_in_token_out.py
+++ b/tests/entrypoints/openai/test_token_in_token_out.py
@@ -54,7 +54,7 @@ async def test_token_in_token_out_and_logprobs(server):
             prompt=token_ids,
             max_tokens=20,
             temperature=0,
-            echo=False,
+            echo=True,
             extra_body={
                 "return_token_ids": True,
             },
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 6e4113e6cf1ed..d0756e42b7963 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -691,5 +691,6 @@ class OpenAIServingCompletion(OpenAIServing):
             truncate_prompt_tokens=request.truncate_prompt_tokens,
             add_special_tokens=request.add_special_tokens,
             cache_salt=request.cache_salt,
-            needs_detokenization=bool(request.echo),
+            needs_detokenization=bool(request.echo
+                                      and not request.return_token_ids),
         )