mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 17:45:19 +08:00
[Misc] add return token strs for tokenize (#18941)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
parent
9a1b9b99d7
commit
20079c6e36
@ -76,11 +76,11 @@ async def test_tokenize_completions(
|
||||
})
|
||||
response.raise_for_status()
|
||||
|
||||
assert response.json() == {
|
||||
"tokens": tokens,
|
||||
"count": len(tokens),
|
||||
"max_model_len": 8192
|
||||
}
|
||||
result = response.json()
|
||||
assert result["tokens"] == tokens
|
||||
assert result["count"] == len(tokens)
|
||||
assert result["max_model_len"] == 8192
|
||||
assert result["token_strs"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -138,11 +138,11 @@ async def test_tokenize_chat(
|
||||
})
|
||||
response.raise_for_status()
|
||||
|
||||
assert response.json() == {
|
||||
"tokens": tokens,
|
||||
"count": len(tokens),
|
||||
"max_model_len": 8192
|
||||
}
|
||||
result = response.json()
|
||||
assert result["tokens"] == tokens
|
||||
assert result["count"] == len(tokens)
|
||||
assert result["max_model_len"] == 8192
|
||||
assert result["token_strs"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -215,11 +215,46 @@ async def test_tokenize_chat_with_tools(
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
assert response.json() == {
|
||||
"tokens": tokens,
|
||||
"count": len(tokens),
|
||||
"max_model_len": 8192,
|
||||
}
|
||||
result = response.json()
|
||||
assert result["tokens"] == tokens
|
||||
assert result["count"] == len(tokens)
|
||||
assert result["max_model_len"] == 8192
|
||||
assert result["token_strs"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name, tokenizer_name",
|
||||
[(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
|
||||
indirect=["tokenizer_name"],
|
||||
)
|
||||
async def test_tokenize_with_return_token_strs(
|
||||
server: RemoteOpenAIServer,
|
||||
model_name: str,
|
||||
tokenizer_name: str,
|
||||
):
|
||||
tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
|
||||
tokenizer_mode="fast")
|
||||
|
||||
prompt = "This is a token_strs test prompt! vllm1"
|
||||
response = requests.post(
|
||||
server.url_for("tokenize"),
|
||||
json={
|
||||
"prompt": prompt,
|
||||
"model": model_name,
|
||||
"return_token_strs": True
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
tokens = tokenizer.encode(prompt, add_special_tokens=True)
|
||||
tokens_str = tokenizer.convert_ids_to_tokens(tokens)
|
||||
|
||||
result = response.json()
|
||||
assert result["tokens"] == tokens
|
||||
assert result["count"] == len(tokens)
|
||||
assert result["max_model_len"] == 8192
|
||||
assert result["token_strs"] == tokens_str
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -1563,6 +1563,11 @@ class TokenizeCompletionRequest(OpenAIBaseModel):
|
||||
"If true (the default), special tokens (e.g. BOS) will be added to "
|
||||
"the prompt."),
|
||||
)
|
||||
return_token_strs: Optional[bool] = Field(
|
||||
default=False,
|
||||
description=("If true, also return the token strings "
|
||||
"corresponding to the token ids."),
|
||||
)
|
||||
|
||||
|
||||
class TokenizeChatRequest(OpenAIBaseModel):
|
||||
@ -1576,6 +1581,11 @@ class TokenizeChatRequest(OpenAIBaseModel):
|
||||
"This is a parameter used by chat template in tokenizer config of the "
|
||||
"model."),
|
||||
)
|
||||
return_token_strs: Optional[bool] = Field(
|
||||
default=False,
|
||||
description=("If true, also return the token strings "
|
||||
"corresponding to the token ids."),
|
||||
)
|
||||
continue_final_message: bool = Field(
|
||||
default=False,
|
||||
description=
|
||||
@ -1633,6 +1643,7 @@ class TokenizeResponse(OpenAIBaseModel):
|
||||
count: int
|
||||
max_model_len: int
|
||||
tokens: list[int]
|
||||
token_strs: Optional[list[str]] = None
|
||||
|
||||
|
||||
class DetokenizeRequest(OpenAIBaseModel):
|
||||
|
||||
@ -110,7 +110,12 @@ class OpenAIServingTokenization(OpenAIServing):
|
||||
dict) and "prompt_token_ids" in engine_prompt:
|
||||
input_ids.extend(engine_prompt["prompt_token_ids"])
|
||||
|
||||
token_strs = None
|
||||
if request.return_token_strs:
|
||||
token_strs = tokenizer.convert_ids_to_tokens(input_ids)
|
||||
|
||||
return TokenizeResponse(tokens=input_ids,
|
||||
token_strs=token_strs,
|
||||
count=len(input_ids),
|
||||
max_model_len=self.max_model_len)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user