[Misc] add return token strs for tokenize (#18941)

Signed-off-by: reidliu41 <reid201711@gmail.com>
Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
Reid 2025-06-01 02:00:11 +08:00 committed by GitHub
parent 9a1b9b99d7
commit 20079c6e36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 66 additions and 15 deletions

View File

@ -76,11 +76,11 @@ async def test_tokenize_completions(
})
response.raise_for_status()
assert response.json() == {
"tokens": tokens,
"count": len(tokens),
"max_model_len": 8192
}
result = response.json()
assert result["tokens"] == tokens
assert result["count"] == len(tokens)
assert result["max_model_len"] == 8192
assert result["token_strs"] is None
@pytest.mark.asyncio
@ -138,11 +138,11 @@ async def test_tokenize_chat(
})
response.raise_for_status()
assert response.json() == {
"tokens": tokens,
"count": len(tokens),
"max_model_len": 8192
}
result = response.json()
assert result["tokens"] == tokens
assert result["count"] == len(tokens)
assert result["max_model_len"] == 8192
assert result["token_strs"] is None
@pytest.mark.asyncio
@ -215,11 +215,46 @@ async def test_tokenize_chat_with_tools(
)
response.raise_for_status()
assert response.json() == {
"tokens": tokens,
"count": len(tokens),
"max_model_len": 8192,
}
result = response.json()
assert result["tokens"] == tokens
assert result["count"] == len(tokens)
assert result["max_model_len"] == 8192
assert result["token_strs"] is None
@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name, tokenizer_name",
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
indirect=["tokenizer_name"],
)
async def test_tokenize_with_return_token_strs(
server: RemoteOpenAIServer,
model_name: str,
tokenizer_name: str,
):
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
tokenizer_mode="fast")
prompt = "This is a token_strs test prompt! vllm1"
response = requests.post(
server.url_for("tokenize"),
json={
"prompt": prompt,
"model": model_name,
"return_token_strs": True
},
)
response.raise_for_status()
tokens = tokenizer.encode(prompt, add_special_tokens=True)
tokens_str = tokenizer.convert_ids_to_tokens(tokens)
result = response.json()
assert result["tokens"] == tokens
assert result["count"] == len(tokens)
assert result["max_model_len"] == 8192
assert result["token_strs"] == tokens_str
@pytest.mark.asyncio

View File

@ -1563,6 +1563,11 @@ class TokenizeCompletionRequest(OpenAIBaseModel):
"If true (the default), special tokens (e.g. BOS) will be added to "
"the prompt."),
)
return_token_strs: Optional[bool] = Field(
default=False,
description=("If true, also return the token strings "
"corresponding to the token ids."),
)
class TokenizeChatRequest(OpenAIBaseModel):
@ -1576,6 +1581,11 @@ class TokenizeChatRequest(OpenAIBaseModel):
"This is a parameter used by chat template in tokenizer config of the "
"model."),
)
return_token_strs: Optional[bool] = Field(
default=False,
description=("If true, also return the token strings "
"corresponding to the token ids."),
)
continue_final_message: bool = Field(
default=False,
description=
@ -1633,6 +1643,7 @@ class TokenizeResponse(OpenAIBaseModel):
count: int
max_model_len: int
tokens: list[int]
token_strs: Optional[list[str]] = None
class DetokenizeRequest(OpenAIBaseModel):

View File

@ -110,7 +110,12 @@ class OpenAIServingTokenization(OpenAIServing):
dict) and "prompt_token_ids" in engine_prompt:
input_ids.extend(engine_prompt["prompt_token_ids"])
token_strs = None
if request.return_token_strs:
token_strs = tokenizer.convert_ids_to_tokens(input_ids)
return TokenizeResponse(tokens=input_ids,
token_strs=token_strs,
count=len(input_ids),
max_model_len=self.max_model_len)