[Core] add support for reasoning parser plugins (#28075)

Signed-off-by: walter beller-morales <walter.beller.morales@gmail.com>
This commit is contained in:
Walter Beller-Morales 2025-11-05 12:15:06 -05:00 committed by GitHub
parent c18f88c6ca
commit 752ddeacaa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 62 additions and 15 deletions

View File

@ -37,6 +37,9 @@ class StructuredOutputsConfig:
reasoning_parser: str = ""
"""Select the reasoning parser depending on the model that you're using.
This is used to parse the reasoning content into OpenAI API format."""
reasoning_parser_plugin: str = ""
"""Path to a dynamically reasoning parser plugin that can be dynamically
loaded and registered."""
enable_in_reasoning: bool = False
"""Whether to use structured input for reasoning."""
@ -60,6 +63,22 @@ class StructuredOutputsConfig:
@model_validator(mode="after")
def _validate_structured_output_config(self) -> Self:
# Import here to avoid circular import
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
if self.reasoning_parser_plugin and len(self.reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(self.reasoning_parser_plugin)
valid_reasoning_parsers = ReasoningParserManager.list_registered()
if (
self.reasoning_parser != ""
and self.reasoning_parser not in valid_reasoning_parsers
):
raise ValueError(
f"invalid reasoning parser: {self.reasoning_parser} "
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
)
if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"):
raise ValueError(
"disable_any_whitespace is only supported for "

View File

@ -80,7 +80,6 @@ from vllm.logger import init_logger
from vllm.platforms import CpuArchEnum, current_platform
from vllm.plugins import load_general_plugins
from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized
from vllm.reasoning import ReasoningParserManager
from vllm.transformers_utils.config import (
get_model_path,
is_interleaved,
@ -495,7 +494,7 @@ class EngineArgs:
VllmConfig, "structured_outputs_config"
)
reasoning_parser: str = StructuredOutputsConfig.reasoning_parser
reasoning_parser_plugin: str | None = None
# Deprecated guided decoding fields
guided_decoding_backend: str | None = None
guided_decoding_disable_fallback: bool | None = None
@ -707,10 +706,13 @@ class EngineArgs:
)
structured_outputs_group.add_argument(
"--reasoning-parser",
# This choice is a special case because it's not static
choices=list(ReasoningParserManager.list_registered()),
# Choices need to be validated after parsing to include plugins
**structured_outputs_kwargs["reasoning_parser"],
)
structured_outputs_group.add_argument(
"--reasoning-parser-plugin",
**structured_outputs_kwargs["reasoning_parser_plugin"],
)
# Deprecated guided decoding arguments
for arg, type in [
("--guided-decoding-backend", str),
@ -1629,6 +1631,11 @@ class EngineArgs:
if self.reasoning_parser:
self.structured_outputs_config.reasoning_parser = self.reasoning_parser
if self.reasoning_parser_plugin:
self.structured_outputs_config.reasoning_parser_plugin = (
self.reasoning_parser_plugin
)
# Forward the deprecated CLI args to the StructuredOutputsConfig
so_config = self.structured_outputs_config
if self.guided_decoding_backend is not None:

View File

@ -1944,13 +1944,13 @@ def validate_api_server_args(args):
f"(chose from {{ {','.join(valid_tool_parses)} }})"
)
valid_reasoning_parses = ReasoningParserManager.list_registered()
valid_reasoning_parsers = ReasoningParserManager.list_registered()
if (
reasoning_parser := args.structured_outputs_config.reasoning_parser
) and reasoning_parser not in valid_reasoning_parses:
) and reasoning_parser not in valid_reasoning_parsers:
raise KeyError(
f"invalid reasoning parser: {reasoning_parser} "
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
)
@ -1964,6 +1964,9 @@ def setup_server(args):
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
ToolParserManager.import_tool_parser(args.tool_parser_plugin)
if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)
validate_api_server_args(args)
# workaround to make sure that we bind the port before the engine is set up.
@ -2013,6 +2016,9 @@ async def run_server_worker(
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
ToolParserManager.import_tool_parser(args.tool_parser_plugin)
if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)
# Load logging config for uvicorn if specified
log_config = load_log_config(args.log_config_file)
if log_config is not None:

View File

@ -334,13 +334,13 @@ async def run_request(
def validate_run_batch_args(args):
valid_reasoning_parses = ReasoningParserManager.list_registered()
valid_reasoning_parsers = ReasoningParserManager.list_registered()
if (
reasoning_parser := args.structured_outputs_config.reasoning_parser
) and reasoning_parser not in valid_reasoning_parses:
) and reasoning_parser not in valid_reasoning_parsers:
raise KeyError(
f"invalid reasoning parser: {reasoning_parser} "
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
)

View File

@ -3,15 +3,21 @@
from abc import abstractmethod
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
DeltaMessage,
ResponsesRequest,
)
from vllm.entrypoints.openai.protocol import DeltaMessage
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
if TYPE_CHECKING:
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
ResponsesRequest,
)
else:
ChatCompletionRequest = Any
ResponsesRequest = Any
class BaseThinkingReasoningParser(ReasoningParser):
"""

View File

@ -64,6 +64,15 @@ class StructuredOutputManager:
self.tokenizer = init_tokenizer_from_configs(
model_config=self.vllm_config.model_config
)
reasoning_parser = (
self.vllm_config.structured_outputs_config.reasoning_parser
)
reasoning_parser_plugin = (
self.vllm_config.structured_outputs_config.reasoning_parser_plugin
)
if reasoning_parser_plugin and len(reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(reasoning_parser_plugin)
reasoning_parser = (
self.vllm_config.structured_outputs_config.reasoning_parser
)