[Bugfix] Preserve original tokenizer class name in CachedTokenizer

HuggingFace transformers processor validates tokenizer type by checking
the class name. When vLLM creates a CachedTokenizer with a modified
class name (e.g., 'CachedQwen2TokenizerFast'), the processor type check
fails with TypeError.

This fix preserves the original tokenizer class name and qualname in
CachedTokenizer, ensuring compatibility with HuggingFace transformers
processor type checking.

Fixes #31080

Signed-off-by: Claude <noreply@anthropic.com>
Signed-off-by: majiayu000 <1835304752@qq.com>
This commit is contained in:
majiayu000 2025-12-24 16:02:48 +08:00
parent ddfac7034e
commit abd1dbc548
2 changed files with 22 additions and 1 deletions

View File

@ -41,3 +41,20 @@ def _check_consistency(target: TokenizerLike, expected: TokenizerLike):
)
assert target.encode("prompt") == expected.encode("prompt")
def test_cached_tokenizer_preserves_class_name():
"""Test that cached tokenizer preserves original class name.
This is important for compatibility with HuggingFace transformers
processor type checking, which validates tokenizer class name.
See: https://github.com/vllm-project/vllm/issues/31080
"""
tokenizer = AutoTokenizer.from_pretrained("gpt2")
original_class_name = tokenizer.__class__.__name__
cached_tokenizer = get_cached_tokenizer(tokenizer)
# The cached tokenizer's class should have the same name as original
assert cached_tokenizer.__class__.__name__ == original_class_name
assert cached_tokenizer.__class__.__qualname__ == tokenizer.__class__.__qualname__

View File

@ -58,7 +58,11 @@ def get_cached_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
def __reduce__(self):
return get_cached_tokenizer, (tokenizer,)
CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"
# Keep the original class name to maintain compatibility with
# HuggingFace transformers processor type checking.
# The processor checks tokenizer class name against expected types.
CachedTokenizer.__name__ = tokenizer.__class__.__name__
CachedTokenizer.__qualname__ = tokenizer.__class__.__qualname__
cached_tokenizer.__class__ = CachedTokenizer
return cached_tokenizer