fix: allow HuggingFace standard chat template params via **kwargs (#27622)

Signed-off-by: wangln19 <wanglinian@dev.wanglinian.msh-dev.svc.cluster.local>
Signed-off-by: wangln19 <96399074+wangln19@users.noreply.github.com>
Co-authored-by: wangln19 <wanglinian@dev.wanglinian.msh-dev.svc.cluster.local>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
wangln19 2025-10-28 21:12:34 +08:00 committed by GitHub
parent a00d6254e9
commit 446912d1cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 1 deletions

View File

@ -1882,6 +1882,39 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
) )
assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
# Additional test: Verify HF base parameters work with **kwargs tokenizers
# This validates the fix for tokenizers like Kimi K2 that use **kwargs
# to receive standard HuggingFace parameters instead of declaring them explicitly
from vllm.entrypoints.chat_utils import _get_hf_base_chat_template_params
hf_base_params = _get_hf_base_chat_template_params()
# Verify common HF parameters are in the base class
assert {"add_generation_prompt", "tools", "continue_final_message"}.issubset(
hf_base_params
), f"Expected HF base params not found in {hf_base_params}"
# Test with a mock tokenizer that uses **kwargs (like Kimi K2)
class MockTokenizerWithKwargs:
def apply_chat_template(self, conversation, **kwargs):
return "mocked_output"
mock_tokenizer = MockTokenizerWithKwargs()
mock_kwargs = {
"add_generation_prompt": True,
"tools": tools,
"continue_final_message": False,
"unknown_param": "should_be_filtered",
}
resolved_mock = resolve_chat_template_kwargs(
mock_tokenizer, chat_template, mock_kwargs, raise_on_unexpected=False
)
# HF base params should pass through even with **kwargs tokenizer
assert "add_generation_prompt" in resolved_mock
assert "tools" in resolved_mock
assert "continue_final_message" in resolved_mock
# Unknown params should be filtered out
assert "unknown_param" not in resolved_mock
# NOTE: Qwen2-Audio default chat template is specially defined inside # NOTE: Qwen2-Audio default chat template is specially defined inside
# processor class instead of using `tokenizer_config.json` # processor class instead of using `tokenizer_config.json`

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio import asyncio
import inspect
import json import json
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from collections import Counter, defaultdict, deque from collections import Counter, defaultdict, deque
@ -1515,6 +1516,24 @@ def _resolve_chat_template_kwargs(
_cached_resolve_chat_template_kwargs = lru_cache(_resolve_chat_template_kwargs) _cached_resolve_chat_template_kwargs = lru_cache(_resolve_chat_template_kwargs)
@lru_cache
def _get_hf_base_chat_template_params() -> frozenset[str]:
# Get standard parameters from HuggingFace's base tokenizer class.
# This dynamically extracts parameters from PreTrainedTokenizer's
# apply_chat_template method, ensuring compatibility with tokenizers
# that use **kwargs to receive standard parameters.
# Read signature from HF's base class - the single source of truth
base_sig = inspect.signature(PreTrainedTokenizer.apply_chat_template)
# Exclude VAR_KEYWORD (**kwargs) and VAR_POSITIONAL (*args) placeholders
return frozenset(
p.name
for p in base_sig.parameters.values()
if p.kind
not in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
)
def resolve_chat_template_kwargs( def resolve_chat_template_kwargs(
tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast, tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
chat_template: str, chat_template: str,
@ -1538,7 +1557,11 @@ def resolve_chat_template_kwargs(
if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False) if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
} }
template_vars = _cached_resolve_chat_template_kwargs(chat_template) template_vars = _cached_resolve_chat_template_kwargs(chat_template)
accept_vars = (fn_kw | template_vars) - unexpected_vars
# Allow standard HF parameters even if tokenizer uses **kwargs to receive them
hf_base_params = _get_hf_base_chat_template_params()
accept_vars = (fn_kw | template_vars | hf_base_params) - unexpected_vars
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars} return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}