mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-05 06:09:09 +08:00
[Core] add support for reasoning parser plugins (#28075)
Signed-off-by: walter beller-morales <walter.beller.morales@gmail.com>
This commit is contained in:
parent
c18f88c6ca
commit
752ddeacaa
@ -37,6 +37,9 @@ class StructuredOutputsConfig:
|
|||||||
reasoning_parser: str = ""
|
reasoning_parser: str = ""
|
||||||
"""Select the reasoning parser depending on the model that you're using.
|
"""Select the reasoning parser depending on the model that you're using.
|
||||||
This is used to parse the reasoning content into OpenAI API format."""
|
This is used to parse the reasoning content into OpenAI API format."""
|
||||||
|
reasoning_parser_plugin: str = ""
|
||||||
|
"""Path to a dynamically reasoning parser plugin that can be dynamically
|
||||||
|
loaded and registered."""
|
||||||
enable_in_reasoning: bool = False
|
enable_in_reasoning: bool = False
|
||||||
"""Whether to use structured input for reasoning."""
|
"""Whether to use structured input for reasoning."""
|
||||||
|
|
||||||
@ -60,6 +63,22 @@ class StructuredOutputsConfig:
|
|||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
def _validate_structured_output_config(self) -> Self:
|
def _validate_structured_output_config(self) -> Self:
|
||||||
|
# Import here to avoid circular import
|
||||||
|
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
|
||||||
|
|
||||||
|
if self.reasoning_parser_plugin and len(self.reasoning_parser_plugin) > 3:
|
||||||
|
ReasoningParserManager.import_reasoning_parser(self.reasoning_parser_plugin)
|
||||||
|
|
||||||
|
valid_reasoning_parsers = ReasoningParserManager.list_registered()
|
||||||
|
if (
|
||||||
|
self.reasoning_parser != ""
|
||||||
|
and self.reasoning_parser not in valid_reasoning_parsers
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
f"invalid reasoning parser: {self.reasoning_parser} "
|
||||||
|
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
|
||||||
|
)
|
||||||
|
|
||||||
if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"):
|
if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"disable_any_whitespace is only supported for "
|
"disable_any_whitespace is only supported for "
|
||||||
|
|||||||
@ -80,7 +80,6 @@ from vllm.logger import init_logger
|
|||||||
from vllm.platforms import CpuArchEnum, current_platform
|
from vllm.platforms import CpuArchEnum, current_platform
|
||||||
from vllm.plugins import load_general_plugins
|
from vllm.plugins import load_general_plugins
|
||||||
from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized
|
from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized
|
||||||
from vllm.reasoning import ReasoningParserManager
|
|
||||||
from vllm.transformers_utils.config import (
|
from vllm.transformers_utils.config import (
|
||||||
get_model_path,
|
get_model_path,
|
||||||
is_interleaved,
|
is_interleaved,
|
||||||
@ -495,7 +494,7 @@ class EngineArgs:
|
|||||||
VllmConfig, "structured_outputs_config"
|
VllmConfig, "structured_outputs_config"
|
||||||
)
|
)
|
||||||
reasoning_parser: str = StructuredOutputsConfig.reasoning_parser
|
reasoning_parser: str = StructuredOutputsConfig.reasoning_parser
|
||||||
|
reasoning_parser_plugin: str | None = None
|
||||||
# Deprecated guided decoding fields
|
# Deprecated guided decoding fields
|
||||||
guided_decoding_backend: str | None = None
|
guided_decoding_backend: str | None = None
|
||||||
guided_decoding_disable_fallback: bool | None = None
|
guided_decoding_disable_fallback: bool | None = None
|
||||||
@ -707,10 +706,13 @@ class EngineArgs:
|
|||||||
)
|
)
|
||||||
structured_outputs_group.add_argument(
|
structured_outputs_group.add_argument(
|
||||||
"--reasoning-parser",
|
"--reasoning-parser",
|
||||||
# This choice is a special case because it's not static
|
# Choices need to be validated after parsing to include plugins
|
||||||
choices=list(ReasoningParserManager.list_registered()),
|
|
||||||
**structured_outputs_kwargs["reasoning_parser"],
|
**structured_outputs_kwargs["reasoning_parser"],
|
||||||
)
|
)
|
||||||
|
structured_outputs_group.add_argument(
|
||||||
|
"--reasoning-parser-plugin",
|
||||||
|
**structured_outputs_kwargs["reasoning_parser_plugin"],
|
||||||
|
)
|
||||||
# Deprecated guided decoding arguments
|
# Deprecated guided decoding arguments
|
||||||
for arg, type in [
|
for arg, type in [
|
||||||
("--guided-decoding-backend", str),
|
("--guided-decoding-backend", str),
|
||||||
@ -1629,6 +1631,11 @@ class EngineArgs:
|
|||||||
if self.reasoning_parser:
|
if self.reasoning_parser:
|
||||||
self.structured_outputs_config.reasoning_parser = self.reasoning_parser
|
self.structured_outputs_config.reasoning_parser = self.reasoning_parser
|
||||||
|
|
||||||
|
if self.reasoning_parser_plugin:
|
||||||
|
self.structured_outputs_config.reasoning_parser_plugin = (
|
||||||
|
self.reasoning_parser_plugin
|
||||||
|
)
|
||||||
|
|
||||||
# Forward the deprecated CLI args to the StructuredOutputsConfig
|
# Forward the deprecated CLI args to the StructuredOutputsConfig
|
||||||
so_config = self.structured_outputs_config
|
so_config = self.structured_outputs_config
|
||||||
if self.guided_decoding_backend is not None:
|
if self.guided_decoding_backend is not None:
|
||||||
|
|||||||
@ -1944,13 +1944,13 @@ def validate_api_server_args(args):
|
|||||||
f"(chose from {{ {','.join(valid_tool_parses)} }})"
|
f"(chose from {{ {','.join(valid_tool_parses)} }})"
|
||||||
)
|
)
|
||||||
|
|
||||||
valid_reasoning_parses = ReasoningParserManager.list_registered()
|
valid_reasoning_parsers = ReasoningParserManager.list_registered()
|
||||||
if (
|
if (
|
||||||
reasoning_parser := args.structured_outputs_config.reasoning_parser
|
reasoning_parser := args.structured_outputs_config.reasoning_parser
|
||||||
) and reasoning_parser not in valid_reasoning_parses:
|
) and reasoning_parser not in valid_reasoning_parsers:
|
||||||
raise KeyError(
|
raise KeyError(
|
||||||
f"invalid reasoning parser: {reasoning_parser} "
|
f"invalid reasoning parser: {reasoning_parser} "
|
||||||
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
|
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1964,6 +1964,9 @@ def setup_server(args):
|
|||||||
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
|
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
|
||||||
ToolParserManager.import_tool_parser(args.tool_parser_plugin)
|
ToolParserManager.import_tool_parser(args.tool_parser_plugin)
|
||||||
|
|
||||||
|
if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
|
||||||
|
ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)
|
||||||
|
|
||||||
validate_api_server_args(args)
|
validate_api_server_args(args)
|
||||||
|
|
||||||
# workaround to make sure that we bind the port before the engine is set up.
|
# workaround to make sure that we bind the port before the engine is set up.
|
||||||
@ -2013,6 +2016,9 @@ async def run_server_worker(
|
|||||||
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
|
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
|
||||||
ToolParserManager.import_tool_parser(args.tool_parser_plugin)
|
ToolParserManager.import_tool_parser(args.tool_parser_plugin)
|
||||||
|
|
||||||
|
if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
|
||||||
|
ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)
|
||||||
|
|
||||||
# Load logging config for uvicorn if specified
|
# Load logging config for uvicorn if specified
|
||||||
log_config = load_log_config(args.log_config_file)
|
log_config = load_log_config(args.log_config_file)
|
||||||
if log_config is not None:
|
if log_config is not None:
|
||||||
|
|||||||
@ -334,13 +334,13 @@ async def run_request(
|
|||||||
|
|
||||||
|
|
||||||
def validate_run_batch_args(args):
|
def validate_run_batch_args(args):
|
||||||
valid_reasoning_parses = ReasoningParserManager.list_registered()
|
valid_reasoning_parsers = ReasoningParserManager.list_registered()
|
||||||
if (
|
if (
|
||||||
reasoning_parser := args.structured_outputs_config.reasoning_parser
|
reasoning_parser := args.structured_outputs_config.reasoning_parser
|
||||||
) and reasoning_parser not in valid_reasoning_parses:
|
) and reasoning_parser not in valid_reasoning_parsers:
|
||||||
raise KeyError(
|
raise KeyError(
|
||||||
f"invalid reasoning parser: {reasoning_parser} "
|
f"invalid reasoning parser: {reasoning_parser} "
|
||||||
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
|
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,15 +3,21 @@
|
|||||||
|
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
from vllm.entrypoints.openai.protocol import (
|
from vllm.entrypoints.openai.protocol import DeltaMessage
|
||||||
ChatCompletionRequest,
|
|
||||||
DeltaMessage,
|
|
||||||
ResponsesRequest,
|
|
||||||
)
|
|
||||||
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from vllm.entrypoints.openai.protocol import (
|
||||||
|
ChatCompletionRequest,
|
||||||
|
ResponsesRequest,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
ChatCompletionRequest = Any
|
||||||
|
ResponsesRequest = Any
|
||||||
|
|
||||||
|
|
||||||
class BaseThinkingReasoningParser(ReasoningParser):
|
class BaseThinkingReasoningParser(ReasoningParser):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -64,6 +64,15 @@ class StructuredOutputManager:
|
|||||||
self.tokenizer = init_tokenizer_from_configs(
|
self.tokenizer = init_tokenizer_from_configs(
|
||||||
model_config=self.vllm_config.model_config
|
model_config=self.vllm_config.model_config
|
||||||
)
|
)
|
||||||
|
reasoning_parser = (
|
||||||
|
self.vllm_config.structured_outputs_config.reasoning_parser
|
||||||
|
)
|
||||||
|
reasoning_parser_plugin = (
|
||||||
|
self.vllm_config.structured_outputs_config.reasoning_parser_plugin
|
||||||
|
)
|
||||||
|
if reasoning_parser_plugin and len(reasoning_parser_plugin) > 3:
|
||||||
|
ReasoningParserManager.import_reasoning_parser(reasoning_parser_plugin)
|
||||||
|
|
||||||
reasoning_parser = (
|
reasoning_parser = (
|
||||||
self.vllm_config.structured_outputs_config.reasoning_parser
|
self.vllm_config.structured_outputs_config.reasoning_parser
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user