[Frontend] Enforce tokenize=False when applying chat template (#27205)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2026-03-16 16:27:15 +08:00 · 2025-10-20 22:57:34 -04:00 · 2025-10-20 22:57:34 -04:00 · 3ada34f9cb
commit 3ada34f9cb
parent 0eb8f2b880
2 changed files with 31 additions and 12 deletions
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@ -1810,6 +1810,7 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
        "unsed_kwargs_2": "abc",
        # should not appear
        "chat_template": "{% Hello world! %}",
+        "tokenize": True,
        # used by tokenizer
        "continue_final_message": True,
        "tools": tools,
@ -1846,10 +1847,21 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
        tools=tools,
        model_config=model_config,
    )
+    with pytest.raises(
+        ValueError, match="Found unexpected chat template kwargs from request"
+    ):
+        # should raise error if `chat_template_kwargs` contains
+        # `chat_template` or `tokenize`
+        resolve_chat_template_kwargs(
+            tokenizer,
+            chat_template=chat_template,
+            chat_template_kwargs=chat_template_kwargs,
+        )
    resolved_chat_template_kwargs = resolve_chat_template_kwargs(
        tokenizer,
        chat_template=chat_template,
        chat_template_kwargs=chat_template_kwargs,
+        raise_on_unexpected=False,
    )
    assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs

--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@ -1499,18 +1499,25 @@ def resolve_chat_template_kwargs(
    tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
    chat_template: str,
    chat_template_kwargs: dict[str, Any],
+    raise_on_unexpected: bool = True,
 ) -> dict[str, Any]:
+    # We exclude chat_template from kwargs here, because
+    # chat template has been already resolved at this stage
+    unexpected_vars = {"chat_template", "tokenize"}
+    if raise_on_unexpected and (
+        unexpected_in_kwargs := unexpected_vars & chat_template_kwargs.keys()
+    ):
+        raise ValueError(
+            "Found unexpected chat template kwargs from request: "
+            f"{unexpected_in_kwargs}"
+        )
+
    fn_kw = {
        k
        for k in chat_template_kwargs
        if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
    }
-
    template_vars = _cached_resolve_chat_template_kwargs(chat_template)
-
-    # We exclude chat_template from kwargs here, because
-    # chat template has been already resolved at this stage
-    unexpected_vars = {"chat_template"}
    accept_vars = (fn_kw | template_vars) - unexpected_vars
    return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}

@ -1522,7 +1529,6 @@ def apply_hf_chat_template(
    tools: list[dict[str, Any]] | None,
    *,
    model_config: ModelConfig,
-    tokenize: bool = False,  # Different from HF's default
    **kwargs: Any,
 ) -> str:
    hf_chat_template = resolve_hf_chat_template(
@ -1539,17 +1545,18 @@ def apply_hf_chat_template(
            "does not define one."
        )

+    resolved_kwargs = resolve_chat_template_kwargs(
+        tokenizer=tokenizer,
+        chat_template=hf_chat_template,
+        chat_template_kwargs=kwargs,
+    )
+
    try:
-        resolved_kwargs = resolve_chat_template_kwargs(
-            tokenizer=tokenizer,
-            chat_template=hf_chat_template,
-            chat_template_kwargs=kwargs,
-        )
        return tokenizer.apply_chat_template(
            conversation=conversation,  # type: ignore[arg-type]
            tools=tools,  # type: ignore[arg-type]
            chat_template=hf_chat_template,
-            tokenize=tokenize,
+            tokenize=False,
            **resolved_kwargs,
        )