From ced693e845508a318e8f8c1d7395d8f2c8d4620b Mon Sep 17 00:00:00 2001 From: Xu Wenqing <121550081+Xu-Wenqing@users.noreply.github.com> Date: Fri, 26 Sep 2025 17:25:39 +0800 Subject: [PATCH] Support LongCat-Flash-Chat tool call (#24083) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 许文卿 Signed-off-by: yewentao256 --- docs/features/tool_calling.md | 9 +++++ .../openai/tool_parsers/__init__.py | 2 + .../tool_parsers/longcat_tool_parser.py | 39 +++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md index 291c313cd57af..16693db7255ca 100644 --- a/docs/features/tool_calling.md +++ b/docs/features/tool_calling.md @@ -310,6 +310,15 @@ Flags: * For non-reasoning: `--tool-call-parser hunyuan_a13b` * For reasoning: `--tool-call-parser hunyuan_a13b --reasoning-parser hunyuan_a13b --enable_reasoning` +### LongCat-Flash-Chat Models (`longcat`) + +Supported models: + +* `meituan-longcat/LongCat-Flash-Chat` +* `meituan-longcat/LongCat-Flash-Chat-FP8` + +Flags: `--tool-call-parser longcat` + ### GLM-4.5 Models (`glm45`) Supported models: diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py index 5e77c406b8d92..2c5a0a6af23f0 100644 --- a/vllm/entrypoints/openai/tool_parsers/__init__.py +++ b/vllm/entrypoints/openai/tool_parsers/__init__.py @@ -14,6 +14,7 @@ from .jamba_tool_parser import JambaToolParser from .kimi_k2_tool_parser import KimiK2ToolParser from .llama4_pythonic_tool_parser import Llama4PythonicToolParser from .llama_tool_parser import Llama3JsonToolParser +from .longcat_tool_parser import LongcatFlashToolParser from .minimax_tool_parser import MinimaxToolParser from .mistral_tool_parser import MistralToolParser from .openai_tool_parser import OpenAIToolParser @@ -36,6 +37,7 @@ __all__ = [ "Llama3JsonToolParser", "JambaToolParser", "Llama4PythonicToolParser", + "LongcatFlashToolParser", "PythonicToolParser", "Phi4MiniJsonToolParser", "DeepSeekV3ToolParser", diff --git a/vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py new file mode 100644 index 0000000000000..87a3fdc44397e --- /dev/null +++ b/vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import regex as re + +from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( + ToolParserManager) +from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import ( + Hermes2ProToolParser) +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +@ToolParserManager.register_module("longcat") +class LongcatFlashToolParser(Hermes2ProToolParser): + + def __init__(self, tokenizer: AnyTokenizer): + super().__init__(tokenizer) + + self.tool_call_start_token: str = "" + self.tool_call_end_token: str = "" + + self.tool_call_regex = re.compile( + r"(.*?)|(.*)", + re.DOTALL) + + self.tool_call_start_token_ids = self.model_tokenizer.encode( + self.tool_call_start_token, add_special_tokens=False) + self.tool_call_end_token_ids = self.model_tokenizer.encode( + self.tool_call_end_token, add_special_tokens=False) + + self.tool_call_start_token_array = [ + self.model_tokenizer.decode([token_id]) + for token_id in self.tool_call_start_token_ids + ] + + self.tool_call_end_token_array = [ + self.model_tokenizer.decode([token_id]) + for token_id in self.tool_call_end_token_ids + ]