From 752ddeacaa7d759f5a9c105532e53762ff601721 Mon Sep 17 00:00:00 2001 From: Walter Beller-Morales Date: Wed, 5 Nov 2025 12:15:06 -0500 Subject: [PATCH] [Core] add support for reasoning parser plugins (#28075) Signed-off-by: walter beller-morales --- vllm/config/structured_outputs.py | 19 +++++++++++++++++++ vllm/engine/arg_utils.py | 15 +++++++++++---- vllm/entrypoints/openai/api_server.py | 12 +++++++++--- vllm/entrypoints/openai/run_batch.py | 6 +++--- vllm/reasoning/basic_parsers.py | 16 +++++++++++----- vllm/v1/structured_output/__init__.py | 9 +++++++++ 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index eb1cc7220b8fe..9530d3d81e15d 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -37,6 +37,9 @@ class StructuredOutputsConfig: reasoning_parser: str = "" """Select the reasoning parser depending on the model that you're using. This is used to parse the reasoning content into OpenAI API format.""" + reasoning_parser_plugin: str = "" + """Path to a dynamically reasoning parser plugin that can be dynamically + loaded and registered.""" enable_in_reasoning: bool = False """Whether to use structured input for reasoning.""" @@ -60,6 +63,22 @@ class StructuredOutputsConfig: @model_validator(mode="after") def _validate_structured_output_config(self) -> Self: + # Import here to avoid circular import + from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager + + if self.reasoning_parser_plugin and len(self.reasoning_parser_plugin) > 3: + ReasoningParserManager.import_reasoning_parser(self.reasoning_parser_plugin) + + valid_reasoning_parsers = ReasoningParserManager.list_registered() + if ( + self.reasoning_parser != "" + and self.reasoning_parser not in valid_reasoning_parsers + ): + raise ValueError( + f"invalid reasoning parser: {self.reasoning_parser} " + f"(chose from {{ {','.join(valid_reasoning_parsers)} }})" + ) + if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"): raise ValueError( "disable_any_whitespace is only supported for " diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index a697f451d483a..e91482e73c795 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -80,7 +80,6 @@ from vllm.logger import init_logger from vllm.platforms import CpuArchEnum, current_platform from vllm.plugins import load_general_plugins from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized -from vllm.reasoning import ReasoningParserManager from vllm.transformers_utils.config import ( get_model_path, is_interleaved, @@ -495,7 +494,7 @@ class EngineArgs: VllmConfig, "structured_outputs_config" ) reasoning_parser: str = StructuredOutputsConfig.reasoning_parser - + reasoning_parser_plugin: str | None = None # Deprecated guided decoding fields guided_decoding_backend: str | None = None guided_decoding_disable_fallback: bool | None = None @@ -707,10 +706,13 @@ class EngineArgs: ) structured_outputs_group.add_argument( "--reasoning-parser", - # This choice is a special case because it's not static - choices=list(ReasoningParserManager.list_registered()), + # Choices need to be validated after parsing to include plugins **structured_outputs_kwargs["reasoning_parser"], ) + structured_outputs_group.add_argument( + "--reasoning-parser-plugin", + **structured_outputs_kwargs["reasoning_parser_plugin"], + ) # Deprecated guided decoding arguments for arg, type in [ ("--guided-decoding-backend", str), @@ -1629,6 +1631,11 @@ class EngineArgs: if self.reasoning_parser: self.structured_outputs_config.reasoning_parser = self.reasoning_parser + if self.reasoning_parser_plugin: + self.structured_outputs_config.reasoning_parser_plugin = ( + self.reasoning_parser_plugin + ) + # Forward the deprecated CLI args to the StructuredOutputsConfig so_config = self.structured_outputs_config if self.guided_decoding_backend is not None: diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 5a05e0bf3a506..11a0c8a07c3d4 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1944,13 +1944,13 @@ def validate_api_server_args(args): f"(chose from {{ {','.join(valid_tool_parses)} }})" ) - valid_reasoning_parses = ReasoningParserManager.list_registered() + valid_reasoning_parsers = ReasoningParserManager.list_registered() if ( reasoning_parser := args.structured_outputs_config.reasoning_parser - ) and reasoning_parser not in valid_reasoning_parses: + ) and reasoning_parser not in valid_reasoning_parsers: raise KeyError( f"invalid reasoning parser: {reasoning_parser} " - f"(chose from {{ {','.join(valid_reasoning_parses)} }})" + f"(chose from {{ {','.join(valid_reasoning_parsers)} }})" ) @@ -1964,6 +1964,9 @@ def setup_server(args): if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3: ToolParserManager.import_tool_parser(args.tool_parser_plugin) + if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3: + ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin) + validate_api_server_args(args) # workaround to make sure that we bind the port before the engine is set up. @@ -2013,6 +2016,9 @@ async def run_server_worker( if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3: ToolParserManager.import_tool_parser(args.tool_parser_plugin) + if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3: + ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin) + # Load logging config for uvicorn if specified log_config = load_log_config(args.log_config_file) if log_config is not None: diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py index 7a39113961e1d..4b9dba085a8e9 100644 --- a/vllm/entrypoints/openai/run_batch.py +++ b/vllm/entrypoints/openai/run_batch.py @@ -334,13 +334,13 @@ async def run_request( def validate_run_batch_args(args): - valid_reasoning_parses = ReasoningParserManager.list_registered() + valid_reasoning_parsers = ReasoningParserManager.list_registered() if ( reasoning_parser := args.structured_outputs_config.reasoning_parser - ) and reasoning_parser not in valid_reasoning_parses: + ) and reasoning_parser not in valid_reasoning_parsers: raise KeyError( f"invalid reasoning parser: {reasoning_parser} " - f"(chose from {{ {','.join(valid_reasoning_parses)} }})" + f"(chose from {{ {','.join(valid_reasoning_parsers)} }})" ) diff --git a/vllm/reasoning/basic_parsers.py b/vllm/reasoning/basic_parsers.py index 621a73b2a59f0..5fb3c8d368a85 100644 --- a/vllm/reasoning/basic_parsers.py +++ b/vllm/reasoning/basic_parsers.py @@ -3,15 +3,21 @@ from abc import abstractmethod from collections.abc import Sequence +from typing import TYPE_CHECKING, Any -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, - DeltaMessage, - ResponsesRequest, -) +from vllm.entrypoints.openai.protocol import DeltaMessage from vllm.reasoning.abs_reasoning_parsers import ReasoningParser from vllm.transformers_utils.tokenizer import AnyTokenizer +if TYPE_CHECKING: + from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, + ResponsesRequest, + ) +else: + ChatCompletionRequest = Any + ResponsesRequest = Any + class BaseThinkingReasoningParser(ReasoningParser): """ diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py index 6f9dbeabd8ca6..acc00526ee89c 100644 --- a/vllm/v1/structured_output/__init__.py +++ b/vllm/v1/structured_output/__init__.py @@ -64,6 +64,15 @@ class StructuredOutputManager: self.tokenizer = init_tokenizer_from_configs( model_config=self.vllm_config.model_config ) + reasoning_parser = ( + self.vllm_config.structured_outputs_config.reasoning_parser + ) + reasoning_parser_plugin = ( + self.vllm_config.structured_outputs_config.reasoning_parser_plugin + ) + if reasoning_parser_plugin and len(reasoning_parser_plugin) > 3: + ReasoningParserManager.import_reasoning_parser(reasoning_parser_plugin) + reasoning_parser = ( self.vllm_config.structured_outputs_config.reasoning_parser )