fix: allow HuggingFace standard chat template params via **kwargs (#27622)

Signed-off-by: wangln19 <wanglinian@dev.wanglinian.msh-dev.svc.cluster.local>
Signed-off-by: wangln19 <96399074+wangln19@users.noreply.github.com>
Co-authored-by: wangln19 <wanglinian@dev.wanglinian.msh-dev.svc.cluster.local>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
wangln19 2025-10-28 21:12:34 +08:00 committed by GitHub
parent a00d6254e9
commit 446912d1cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 1 deletions

View File

@ -1882,6 +1882,39 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
)
assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
# Additional test: Verify HF base parameters work with **kwargs tokenizers
# This validates the fix for tokenizers like Kimi K2 that use **kwargs
# to receive standard HuggingFace parameters instead of declaring them explicitly
from vllm.entrypoints.chat_utils import _get_hf_base_chat_template_params
hf_base_params = _get_hf_base_chat_template_params()
# Verify common HF parameters are in the base class
assert {"add_generation_prompt", "tools", "continue_final_message"}.issubset(
hf_base_params
), f"Expected HF base params not found in {hf_base_params}"
# Test with a mock tokenizer that uses **kwargs (like Kimi K2)
class MockTokenizerWithKwargs:
def apply_chat_template(self, conversation, **kwargs):
return "mocked_output"
mock_tokenizer = MockTokenizerWithKwargs()
mock_kwargs = {
"add_generation_prompt": True,
"tools": tools,
"continue_final_message": False,
"unknown_param": "should_be_filtered",
}
resolved_mock = resolve_chat_template_kwargs(
mock_tokenizer, chat_template, mock_kwargs, raise_on_unexpected=False
)
# HF base params should pass through even with **kwargs tokenizer
assert "add_generation_prompt" in resolved_mock
assert "tools" in resolved_mock
assert "continue_final_message" in resolved_mock
# Unknown params should be filtered out
assert "unknown_param" not in resolved_mock
# NOTE: Qwen2-Audio default chat template is specially defined inside
# processor class instead of using `tokenizer_config.json`

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import inspect
import json
from abc import ABC, abstractmethod
from collections import Counter, defaultdict, deque
@ -1515,6 +1516,24 @@ def _resolve_chat_template_kwargs(
_cached_resolve_chat_template_kwargs = lru_cache(_resolve_chat_template_kwargs)
@lru_cache
def _get_hf_base_chat_template_params() -> frozenset[str]:
# Get standard parameters from HuggingFace's base tokenizer class.
# This dynamically extracts parameters from PreTrainedTokenizer's
# apply_chat_template method, ensuring compatibility with tokenizers
# that use **kwargs to receive standard parameters.
# Read signature from HF's base class - the single source of truth
base_sig = inspect.signature(PreTrainedTokenizer.apply_chat_template)
# Exclude VAR_KEYWORD (**kwargs) and VAR_POSITIONAL (*args) placeholders
return frozenset(
p.name
for p in base_sig.parameters.values()
if p.kind
not in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
)
def resolve_chat_template_kwargs(
tokenizer: PreTrainedTokenizer | PreTrainedTokenizerFast,
chat_template: str,
@ -1538,7 +1557,11 @@ def resolve_chat_template_kwargs(
if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
}
template_vars = _cached_resolve_chat_template_kwargs(chat_template)
accept_vars = (fn_kw | template_vars) - unexpected_vars
# Allow standard HF parameters even if tokenizer uses **kwargs to receive them
hf_base_params = _get_hf_base_chat_template_params()
accept_vars = (fn_kw | template_vars | hf_base_params) - unexpected_vars
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}