mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 16:35:40 +08:00
[Frontend] Enforce tokenize=False when applying chat template (#27205)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
parent
0eb8f2b880
commit
3ada34f9cb
@ -1810,6 +1810,7 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
|
|||||||
"unsed_kwargs_2": "abc",
|
"unsed_kwargs_2": "abc",
|
||||||
# should not appear
|
# should not appear
|
||||||
"chat_template": "{% Hello world! %}",
|
"chat_template": "{% Hello world! %}",
|
||||||
|
"tokenize": True,
|
||||||
# used by tokenizer
|
# used by tokenizer
|
||||||
"continue_final_message": True,
|
"continue_final_message": True,
|
||||||
"tools": tools,
|
"tools": tools,
|
||||||
@ -1846,10 +1847,21 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
|
|||||||
tools=tools,
|
tools=tools,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
)
|
)
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError, match="Found unexpected chat template kwargs from request"
|
||||||
|
):
|
||||||
|
# should raise error if `chat_template_kwargs` contains
|
||||||
|
# `chat_template` or `tokenize`
|
||||||
|
resolve_chat_template_kwargs(
|
||||||
|
tokenizer,
|
||||||
|
chat_template=chat_template,
|
||||||
|
chat_template_kwargs=chat_template_kwargs,
|
||||||
|
)
|
||||||
resolved_chat_template_kwargs = resolve_chat_template_kwargs(
|
resolved_chat_template_kwargs = resolve_chat_template_kwargs(
|
||||||
tokenizer,
|
tokenizer,
|
||||||
chat_template=chat_template,
|
chat_template=chat_template,
|
||||||
chat_template_kwargs=chat_template_kwargs,
|
chat_template_kwargs=chat_template_kwargs,
|
||||||
|
raise_on_unexpected=False,
|
||||||
)
|
)
|
||||||
assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
|
assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
|
||||||
|
|
||||||
|
|||||||
@ -1499,18 +1499,25 @@ def resolve_chat_template_kwargs(
|
|||||||
tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
|
tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
|
||||||
chat_template: str,
|
chat_template: str,
|
||||||
chat_template_kwargs: dict[str, Any],
|
chat_template_kwargs: dict[str, Any],
|
||||||
|
raise_on_unexpected: bool = True,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
# We exclude chat_template from kwargs here, because
|
||||||
|
# chat template has been already resolved at this stage
|
||||||
|
unexpected_vars = {"chat_template", "tokenize"}
|
||||||
|
if raise_on_unexpected and (
|
||||||
|
unexpected_in_kwargs := unexpected_vars & chat_template_kwargs.keys()
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
"Found unexpected chat template kwargs from request: "
|
||||||
|
f"{unexpected_in_kwargs}"
|
||||||
|
)
|
||||||
|
|
||||||
fn_kw = {
|
fn_kw = {
|
||||||
k
|
k
|
||||||
for k in chat_template_kwargs
|
for k in chat_template_kwargs
|
||||||
if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
|
if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
|
||||||
}
|
}
|
||||||
|
|
||||||
template_vars = _cached_resolve_chat_template_kwargs(chat_template)
|
template_vars = _cached_resolve_chat_template_kwargs(chat_template)
|
||||||
|
|
||||||
# We exclude chat_template from kwargs here, because
|
|
||||||
# chat template has been already resolved at this stage
|
|
||||||
unexpected_vars = {"chat_template"}
|
|
||||||
accept_vars = (fn_kw | template_vars) - unexpected_vars
|
accept_vars = (fn_kw | template_vars) - unexpected_vars
|
||||||
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
|
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
|
||||||
|
|
||||||
@ -1522,7 +1529,6 @@ def apply_hf_chat_template(
|
|||||||
tools: list[dict[str, Any]] | None,
|
tools: list[dict[str, Any]] | None,
|
||||||
*,
|
*,
|
||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
tokenize: bool = False, # Different from HF's default
|
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> str:
|
) -> str:
|
||||||
hf_chat_template = resolve_hf_chat_template(
|
hf_chat_template = resolve_hf_chat_template(
|
||||||
@ -1539,17 +1545,18 @@ def apply_hf_chat_template(
|
|||||||
"does not define one."
|
"does not define one."
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
resolved_kwargs = resolve_chat_template_kwargs(
|
resolved_kwargs = resolve_chat_template_kwargs(
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
chat_template=hf_chat_template,
|
chat_template=hf_chat_template,
|
||||||
chat_template_kwargs=kwargs,
|
chat_template_kwargs=kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
return tokenizer.apply_chat_template(
|
return tokenizer.apply_chat_template(
|
||||||
conversation=conversation, # type: ignore[arg-type]
|
conversation=conversation, # type: ignore[arg-type]
|
||||||
tools=tools, # type: ignore[arg-type]
|
tools=tools, # type: ignore[arg-type]
|
||||||
chat_template=hf_chat_template,
|
chat_template=hf_chat_template,
|
||||||
tokenize=tokenize,
|
tokenize=False,
|
||||||
**resolved_kwargs,
|
**resolved_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user