[Model] Add optional parameter to reasoning parser constructor (#25554)

Signed-off-by: taohui <taohui3@gmail.com>
Signed-off-by: Tao Hui <taohui3@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Tao Hui 2025-09-26 01:12:50 +08:00 committed by GitHub
parent 13cc7f5370
commit b8d9e4a326
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 15 additions and 15 deletions

View File

@ -34,7 +34,7 @@ class ReasoningParser:
It is used to extract reasoning content from the model output. It is used to extract reasoning content from the model output.
""" """
def __init__(self, tokenizer: AnyTokenizer): def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs):
self.model_tokenizer = tokenizer self.model_tokenizer = tokenizer
@cached_property @cached_property

View File

@ -35,8 +35,8 @@ class BaseThinkingReasoningParser(ReasoningParser):
"""The token that ends reasoning content.""" """The token that ends reasoning content."""
raise NotImplementedError raise NotImplementedError
def __init__(self, tokenizer: AnyTokenizer): def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs):
super().__init__(tokenizer) super().__init__(tokenizer, *args, **kwargs)
if not self.model_tokenizer: if not self.model_tokenizer:
raise ValueError( raise ValueError(

View File

@ -26,8 +26,8 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
from the model's output. from the model's output.
""" """
def __init__(self, tokenizer: PreTrainedTokenizerBase): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer) super().__init__(tokenizer, *args, **kwargs)
self.think_start_token = "<think>" self.think_start_token = "<think>"
self.think_end_token = "</think>" self.think_end_token = "</think>"

View File

@ -24,8 +24,8 @@ class GptOssReasoningParser(ReasoningParser):
is only used for detecting the end of the reasoning content. is only used for detecting the end of the reasoning content.
""" """
def __init__(self, tokenizer: PreTrainedTokenizerBase): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer) super().__init__(tokenizer, *args, **kwargs)
self.reasoning_end_token_ids = self.model_tokenizer.encode( self.reasoning_end_token_ids = self.model_tokenizer.encode(
"<|start|>assistant<|channel|>final<|message|>") "<|start|>assistant<|channel|>final<|message|>")

View File

@ -24,8 +24,8 @@ class GraniteReasoningParser(ReasoningParser):
and "Here is my response:" to separate its thinking / response outputs. and "Here is my response:" to separate its thinking / response outputs.
""" """
def __init__(self, tokenizer: PreTrainedTokenizerBase): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer) super().__init__(tokenizer, *args, **kwargs)
# NOTE: There have been some observed occurrences of quantized # NOTE: There have been some observed occurrences of quantized
# instances of the current models using "Here's" instead of "Here is", # instances of the current models using "Here's" instead of "Here is",

View File

@ -40,8 +40,8 @@ class HunyuanA13BReasoningParser(ReasoningParser):
response ends: "\n</answer>": [524, 9399, 29] response ends: "\n</answer>": [524, 9399, 29]
""" """
def __init__(self, tokenizer: PreTrainedTokenizerBase): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer) super().__init__(tokenizer, *args, **kwargs)
self.think_start_expr = r"<think>\n" self.think_start_expr = r"<think>\n"
self.think_end_expr = r"\n</think>\n" self.think_end_expr = r"\n</think>\n"

View File

@ -21,12 +21,12 @@ class MistralReasoningParser(DeepSeekR1ReasoningParser):
text. This parser extracts the reasoning content from the model output. text. This parser extracts the reasoning content from the model output.
""" """
def __init__(self, tokenizer: MistralTokenizer): def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs):
if not isinstance(tokenizer, MistralTokenizer): if not isinstance(tokenizer, MistralTokenizer):
raise ValueError( raise ValueError(
"The tokenizer must be an instance of MistralTokenizer.") "The tokenizer must be an instance of MistralTokenizer.")
ReasoningParser.__init__(self, tokenizer) ReasoningParser.__init__(self, tokenizer, *args, **kwargs)
if not self.model_tokenizer: if not self.model_tokenizer:
raise ValueError( raise ValueError(

View File

@ -24,8 +24,8 @@ class Step3ReasoningParser(ReasoningParser):
text. This parser extracts all content before </think> as reasoning content. text. This parser extracts all content before </think> as reasoning content.
""" """
def __init__(self, tokenizer: PreTrainedTokenizerBase): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer) super().__init__(tokenizer, *args, **kwargs)
self.think_end_token = "</think>" self.think_end_token = "</think>"
self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}", self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}",