diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index 39b08ec11107..320009c2611e 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -34,7 +34,7 @@ class ReasoningParser:
It is used to extract reasoning content from the model output.
"""
- def __init__(self, tokenizer: AnyTokenizer):
+ def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs):
self.model_tokenizer = tokenizer
@cached_property
diff --git a/vllm/reasoning/basic_parsers.py b/vllm/reasoning/basic_parsers.py
index 03cb882c2693..cea4b8601ae7 100644
--- a/vllm/reasoning/basic_parsers.py
+++ b/vllm/reasoning/basic_parsers.py
@@ -35,8 +35,8 @@ class BaseThinkingReasoningParser(ReasoningParser):
"""The token that ends reasoning content."""
raise NotImplementedError
- def __init__(self, tokenizer: AnyTokenizer):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs):
+ super().__init__(tokenizer, *args, **kwargs)
if not self.model_tokenizer:
raise ValueError(
diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py
index 460e38d2d396..11e828a7039f 100644
--- a/vllm/reasoning/glm4_moe_reasoning_parser.py
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
@@ -26,8 +26,8 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
from the model's output.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
+ super().__init__(tokenizer, *args, **kwargs)
self.think_start_token = ""
self.think_end_token = ""
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 3bd4d872ce22..b0988d5d2618 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -24,8 +24,8 @@ class GptOssReasoningParser(ReasoningParser):
is only used for detecting the end of the reasoning content.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
+ super().__init__(tokenizer, *args, **kwargs)
self.reasoning_end_token_ids = self.model_tokenizer.encode(
"<|start|>assistant<|channel|>final<|message|>")
diff --git a/vllm/reasoning/granite_reasoning_parser.py b/vllm/reasoning/granite_reasoning_parser.py
index 212e14b09286..b76170f39f10 100644
--- a/vllm/reasoning/granite_reasoning_parser.py
+++ b/vllm/reasoning/granite_reasoning_parser.py
@@ -24,8 +24,8 @@ class GraniteReasoningParser(ReasoningParser):
and "Here is my response:" to separate its thinking / response outputs.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
+ super().__init__(tokenizer, *args, **kwargs)
# NOTE: There have been some observed occurrences of quantized
# instances of the current models using "Here's" instead of "Here is",
diff --git a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
index 9deec8a1e8fb..6e3b056d6b62 100644
--- a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
+++ b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
@@ -40,8 +40,8 @@ class HunyuanA13BReasoningParser(ReasoningParser):
response ends: "\n": [524, 9399, 29]
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
+ super().__init__(tokenizer, *args, **kwargs)
self.think_start_expr = r"\n"
self.think_end_expr = r"\n\n"
diff --git a/vllm/reasoning/mistral_reasoning_parser.py b/vllm/reasoning/mistral_reasoning_parser.py
index 5cb54e6acbb3..ceda96ca6a6d 100644
--- a/vllm/reasoning/mistral_reasoning_parser.py
+++ b/vllm/reasoning/mistral_reasoning_parser.py
@@ -21,12 +21,12 @@ class MistralReasoningParser(DeepSeekR1ReasoningParser):
text. This parser extracts the reasoning content from the model output.
"""
- def __init__(self, tokenizer: MistralTokenizer):
+ def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs):
if not isinstance(tokenizer, MistralTokenizer):
raise ValueError(
"The tokenizer must be an instance of MistralTokenizer.")
- ReasoningParser.__init__(self, tokenizer)
+ ReasoningParser.__init__(self, tokenizer, *args, **kwargs)
if not self.model_tokenizer:
raise ValueError(
diff --git a/vllm/reasoning/step3_reasoning_parser.py b/vllm/reasoning/step3_reasoning_parser.py
index f642ea977c58..6e5deb52d345 100644
--- a/vllm/reasoning/step3_reasoning_parser.py
+++ b/vllm/reasoning/step3_reasoning_parser.py
@@ -24,8 +24,8 @@ class Step3ReasoningParser(ReasoningParser):
text. This parser extracts all content before as reasoning content.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
+ super().__init__(tokenizer, *args, **kwargs)
self.think_end_token = ""
self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}",