mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 21:55:50 +08:00
[Frontend] Enforce tokenize=False when applying chat template (#27205)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
0eb8f2b880
commit
3ada34f9cb
@ -1810,6 +1810,7 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
|
||||
"unsed_kwargs_2": "abc",
|
||||
# should not appear
|
||||
"chat_template": "{% Hello world! %}",
|
||||
"tokenize": True,
|
||||
# used by tokenizer
|
||||
"continue_final_message": True,
|
||||
"tools": tools,
|
||||
@ -1846,10 +1847,21 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
|
||||
tools=tools,
|
||||
model_config=model_config,
|
||||
)
|
||||
with pytest.raises(
|
||||
ValueError, match="Found unexpected chat template kwargs from request"
|
||||
):
|
||||
# should raise error if `chat_template_kwargs` contains
|
||||
# `chat_template` or `tokenize`
|
||||
resolve_chat_template_kwargs(
|
||||
tokenizer,
|
||||
chat_template=chat_template,
|
||||
chat_template_kwargs=chat_template_kwargs,
|
||||
)
|
||||
resolved_chat_template_kwargs = resolve_chat_template_kwargs(
|
||||
tokenizer,
|
||||
chat_template=chat_template,
|
||||
chat_template_kwargs=chat_template_kwargs,
|
||||
raise_on_unexpected=False,
|
||||
)
|
||||
assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
|
||||
|
||||
|
||||
@ -1499,18 +1499,25 @@ def resolve_chat_template_kwargs(
|
||||
tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
|
||||
chat_template: str,
|
||||
chat_template_kwargs: dict[str, Any],
|
||||
raise_on_unexpected: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
# We exclude chat_template from kwargs here, because
|
||||
# chat template has been already resolved at this stage
|
||||
unexpected_vars = {"chat_template", "tokenize"}
|
||||
if raise_on_unexpected and (
|
||||
unexpected_in_kwargs := unexpected_vars & chat_template_kwargs.keys()
|
||||
):
|
||||
raise ValueError(
|
||||
"Found unexpected chat template kwargs from request: "
|
||||
f"{unexpected_in_kwargs}"
|
||||
)
|
||||
|
||||
fn_kw = {
|
||||
k
|
||||
for k in chat_template_kwargs
|
||||
if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
|
||||
}
|
||||
|
||||
template_vars = _cached_resolve_chat_template_kwargs(chat_template)
|
||||
|
||||
# We exclude chat_template from kwargs here, because
|
||||
# chat template has been already resolved at this stage
|
||||
unexpected_vars = {"chat_template"}
|
||||
accept_vars = (fn_kw | template_vars) - unexpected_vars
|
||||
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
|
||||
|
||||
@ -1522,7 +1529,6 @@ def apply_hf_chat_template(
|
||||
tools: list[dict[str, Any]] | None,
|
||||
*,
|
||||
model_config: ModelConfig,
|
||||
tokenize: bool = False, # Different from HF's default
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
hf_chat_template = resolve_hf_chat_template(
|
||||
@ -1539,17 +1545,18 @@ def apply_hf_chat_template(
|
||||
"does not define one."
|
||||
)
|
||||
|
||||
resolved_kwargs = resolve_chat_template_kwargs(
|
||||
tokenizer=tokenizer,
|
||||
chat_template=hf_chat_template,
|
||||
chat_template_kwargs=kwargs,
|
||||
)
|
||||
|
||||
try:
|
||||
resolved_kwargs = resolve_chat_template_kwargs(
|
||||
tokenizer=tokenizer,
|
||||
chat_template=hf_chat_template,
|
||||
chat_template_kwargs=kwargs,
|
||||
)
|
||||
return tokenizer.apply_chat_template(
|
||||
conversation=conversation, # type: ignore[arg-type]
|
||||
tools=tools, # type: ignore[arg-type]
|
||||
chat_template=hf_chat_template,
|
||||
tokenize=tokenize,
|
||||
tokenize=False,
|
||||
**resolved_kwargs,
|
||||
)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user