[Core] add support for reasoning parser plugins (#28075)

Signed-off-by: walter beller-morales <walter.beller.morales@gmail.com>
2026-01-20 11:44:33 +08:00 · 2025-11-05 12:15:06 -05:00 · 2025-11-05 12:15:06 -05:00 · 752ddeacaa
commit 752ddeacaa
parent c18f88c6ca
6 changed files with 62 additions and 15 deletions
--- a/vllm/config/structured_outputs.py
+++ b/vllm/config/structured_outputs.py
@ -37,6 +37,9 @@ class StructuredOutputsConfig:
    reasoning_parser: str = ""
    """Select the reasoning parser depending on the model that you're using.
    This is used to parse the reasoning content into OpenAI API format."""
+    reasoning_parser_plugin: str = ""
+    """Path to a dynamically reasoning parser plugin that can be dynamically
+    loaded and registered."""
    enable_in_reasoning: bool = False
    """Whether to use structured input for reasoning."""

@ -60,6 +63,22 @@ class StructuredOutputsConfig:

    @model_validator(mode="after")
    def _validate_structured_output_config(self) -> Self:
+        # Import here to avoid circular import
+        from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
+
+        if self.reasoning_parser_plugin and len(self.reasoning_parser_plugin) > 3:
+            ReasoningParserManager.import_reasoning_parser(self.reasoning_parser_plugin)
+
+        valid_reasoning_parsers = ReasoningParserManager.list_registered()
+        if (
+            self.reasoning_parser != ""
+            and self.reasoning_parser not in valid_reasoning_parsers
+        ):
+            raise ValueError(
+                f"invalid reasoning parser: {self.reasoning_parser} "
+                f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
+            )
+
        if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"):
            raise ValueError(
                "disable_any_whitespace is only supported for "
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -80,7 +80,6 @@ from vllm.logger import init_logger
 from vllm.platforms import CpuArchEnum, current_platform
 from vllm.plugins import load_general_plugins
 from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized
-from vllm.reasoning import ReasoningParserManager
 from vllm.transformers_utils.config import (
    get_model_path,
    is_interleaved,
@ -495,7 +494,7 @@ class EngineArgs:
        VllmConfig, "structured_outputs_config"
    )
    reasoning_parser: str = StructuredOutputsConfig.reasoning_parser
-
+    reasoning_parser_plugin: str | None = None
    # Deprecated guided decoding fields
    guided_decoding_backend: str | None = None
    guided_decoding_disable_fallback: bool | None = None
@ -707,10 +706,13 @@ class EngineArgs:
        )
        structured_outputs_group.add_argument(
            "--reasoning-parser",
-            # This choice is a special case because it's not static
-            choices=list(ReasoningParserManager.list_registered()),
+            # Choices need to be validated after parsing to include plugins
            **structured_outputs_kwargs["reasoning_parser"],
        )
+        structured_outputs_group.add_argument(
+            "--reasoning-parser-plugin",
+            **structured_outputs_kwargs["reasoning_parser_plugin"],
+        )
        # Deprecated guided decoding arguments
        for arg, type in [
            ("--guided-decoding-backend", str),
@ -1629,6 +1631,11 @@ class EngineArgs:
        if self.reasoning_parser:
            self.structured_outputs_config.reasoning_parser = self.reasoning_parser

+        if self.reasoning_parser_plugin:
+            self.structured_outputs_config.reasoning_parser_plugin = (
+                self.reasoning_parser_plugin
+            )
+
        # Forward the deprecated CLI args to the StructuredOutputsConfig
        so_config = self.structured_outputs_config
        if self.guided_decoding_backend is not None:
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@ -1944,13 +1944,13 @@ def validate_api_server_args(args):
            f"(chose from {{ {','.join(valid_tool_parses)} }})"
        )

-    valid_reasoning_parses = ReasoningParserManager.list_registered()
+    valid_reasoning_parsers = ReasoningParserManager.list_registered()
    if (
        reasoning_parser := args.structured_outputs_config.reasoning_parser
-    ) and reasoning_parser not in valid_reasoning_parses:
+    ) and reasoning_parser not in valid_reasoning_parsers:
        raise KeyError(
            f"invalid reasoning parser: {reasoning_parser} "
-            f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
+            f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
        )


@ -1964,6 +1964,9 @@ def setup_server(args):
    if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
        ToolParserManager.import_tool_parser(args.tool_parser_plugin)

+    if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
+        ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)
+
    validate_api_server_args(args)

    # workaround to make sure that we bind the port before the engine is set up.
@ -2013,6 +2016,9 @@ async def run_server_worker(
    if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
        ToolParserManager.import_tool_parser(args.tool_parser_plugin)

+    if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
+        ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)
+
    # Load logging config for uvicorn if specified
    log_config = load_log_config(args.log_config_file)
    if log_config is not None:
--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@ -334,13 +334,13 @@ async def run_request(


 def validate_run_batch_args(args):
-    valid_reasoning_parses = ReasoningParserManager.list_registered()
+    valid_reasoning_parsers = ReasoningParserManager.list_registered()
    if (
        reasoning_parser := args.structured_outputs_config.reasoning_parser
-    ) and reasoning_parser not in valid_reasoning_parses:
+    ) and reasoning_parser not in valid_reasoning_parsers:
        raise KeyError(
            f"invalid reasoning parser: {reasoning_parser} "
-            f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
+            f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
        )


--- a/vllm/reasoning/basic_parsers.py
+++ b/vllm/reasoning/basic_parsers.py
@ -3,15 +3,21 @@

 from abc import abstractmethod
 from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any

-from vllm.entrypoints.openai.protocol import (
-    ChatCompletionRequest,
-    DeltaMessage,
-    ResponsesRequest,
-)
+from vllm.entrypoints.openai.protocol import DeltaMessage
 from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
 from vllm.transformers_utils.tokenizer import AnyTokenizer

+if TYPE_CHECKING:
+    from vllm.entrypoints.openai.protocol import (
+        ChatCompletionRequest,
+        ResponsesRequest,
+    )
+else:
+    ChatCompletionRequest = Any
+    ResponsesRequest = Any
+

 class BaseThinkingReasoningParser(ReasoningParser):
    """
--- a/vllm/v1/structured_output/init.py
+++ b/vllm/v1/structured_output/init.py
@ -64,6 +64,15 @@ class StructuredOutputManager:
            self.tokenizer = init_tokenizer_from_configs(
                model_config=self.vllm_config.model_config
            )
+            reasoning_parser = (
+                self.vllm_config.structured_outputs_config.reasoning_parser
+            )
+            reasoning_parser_plugin = (
+                self.vllm_config.structured_outputs_config.reasoning_parser_plugin
+            )
+            if reasoning_parser_plugin and len(reasoning_parser_plugin) > 3:
+                ReasoningParserManager.import_reasoning_parser(reasoning_parser_plugin)
+
            reasoning_parser = (
                self.vllm_config.structured_outputs_config.reasoning_parser
            )