[Refactor] Lazy-loaded reasoning_parser (#28092)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey 2025-11-05 15:37:02 +08:00 committed by GitHub
parent b7cbc25416
commit e261d37c9a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 206 additions and 99 deletions

View File

@ -219,7 +219,6 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
# define a reasoning parser and register it to vllm
# the name list in register_module can be used
# in --reasoning-parser.
@ReasoningParserManager.register_module(["example"])
class ExampleParser(ReasoningParser):
def __init__(self, tokenizer: AnyTokenizer):
super().__init__(tokenizer)
@ -263,6 +262,12 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
tuple[Optional[str], Optional[str]]
A tuple containing the reasoning content and the content.
"""
# Register the reasoning parser
ReasoningParserManager.register_lazy_module(
name="example",
module_path="vllm.reasoning.example_reasoning_parser",
class_name="ExampleParser",
)
```
Additionally, to enable structured output, you'll need to create a new `Reasoner` similar to the one in [vllm/reasoning/deepseek_r1_reasoning_parser.py](../../vllm/reasoning/deepseek_r1_reasoning_parser.py).

View File

@ -5,11 +5,9 @@ import pytest
from transformers import AutoTokenizer
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.reasoning import (
DeepSeekR1ReasoningParser,
DeepSeekV3ReasoningParser,
IdentityReasoningParser,
)
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-V3.1"

View File

@ -708,7 +708,7 @@ class EngineArgs:
structured_outputs_group.add_argument(
"--reasoning-parser",
# This choice is a special case because it's not static
choices=list(ReasoningParserManager.reasoning_parsers),
choices=list(ReasoningParserManager.list_registered()),
**structured_outputs_kwargs["reasoning_parser"],
)
# Deprecated guided decoding arguments

View File

@ -1944,7 +1944,7 @@ def validate_api_server_args(args):
f"(chose from {{ {','.join(valid_tool_parses)} }})"
)
valid_reasoning_parses = ReasoningParserManager.reasoning_parsers.keys()
valid_reasoning_parses = ReasoningParserManager.list_registered()
if (
reasoning_parser := args.structured_outputs_config.reasoning_parser
) and reasoning_parser not in valid_reasoning_parses:

View File

@ -334,7 +334,7 @@ async def run_request(
def validate_run_batch_args(args):
valid_reasoning_parses = ReasoningParserManager.reasoning_parsers.keys()
valid_reasoning_parses = ReasoningParserManager.list_registered()
if (
reasoning_parser := args.structured_outputs_config.reasoning_parser
) and reasoning_parser not in valid_reasoning_parses:

View File

@ -72,7 +72,7 @@ _TOOL_PARSERS_TO_REGISTER = {
),
"llama4_json": (
"llama_tool_parser",
"Llama4JsonToolParser",
"Llama3JsonToolParser",
),
"llama4_pythonic": (
"llama4_pythonic_tool_parser",
@ -116,11 +116,11 @@ _TOOL_PARSERS_TO_REGISTER = {
),
"qwen3_xml": (
"qwen3xml_tool_parser",
"Qwen3XmlToolParser",
"Qwen3XMLToolParser",
),
"seed_oss": (
"seed_oss_tool_parser",
"SeedOsSToolParser",
"SeedOssToolParser",
),
"step3": (
"step3_tool_parser",

View File

@ -1,39 +1,88 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from .abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
from .basic_parsers import BaseThinkingReasoningParser
from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from .deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
from .ernie45_reasoning_parser import Ernie45ReasoningParser
from .glm4_moe_reasoning_parser import Glm4MoeModelReasoningParser
from .gptoss_reasoning_parser import GptOssReasoningParser
from .granite_reasoning_parser import GraniteReasoningParser
from .hunyuan_a13b_reasoning_parser import HunyuanA13BReasoningParser
from .identity_reasoning_parser import IdentityReasoningParser
from .minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
from .mistral_reasoning_parser import MistralReasoningParser
from .olmo3_reasoning_parser import Olmo3ReasoningParser
from .qwen3_reasoning_parser import Qwen3ReasoningParser
from .seedoss_reasoning_parser import SeedOSSReasoningParser
from .step3_reasoning_parser import Step3ReasoningParser
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
__all__ = [
"ReasoningParser",
"BaseThinkingReasoningParser",
"ReasoningParserManager",
"DeepSeekR1ReasoningParser",
"IdentityReasoningParser",
"DeepSeekV3ReasoningParser",
"Ernie45ReasoningParser",
"GraniteReasoningParser",
"HunyuanA13BReasoningParser",
"Qwen3ReasoningParser",
"Glm4MoeModelReasoningParser",
"MistralReasoningParser",
"Olmo3ReasoningParser",
"Step3ReasoningParser",
"GptOssReasoningParser",
"SeedOSSReasoningParser",
"MiniMaxM2ReasoningParser",
]
"""
Register a lazy module mapping.
Example:
ReasoningParserManager.register_lazy_module(
name="qwen3",
module_path="vllm.reasoning.qwen3_reasoning_parser",
class_name="Qwen3ReasoningParser",
)
"""
_REASONING_PARSERS_TO_REGISTER = {
"deepseek_r1": ( # name
"deepseek_r1_reasoning_parser", # filename
"DeepSeekR1ReasoningParser", # class_name
),
"deepseek_v3": (
"deepseek_v3_reasoning_parser",
"DeepSeekV3ReasoningParser",
),
"ernie45": (
"ernie45_reasoning_parser",
"Ernie45ReasoningParser",
),
"glm45": (
"glm4_moe_reasoning_parser",
"Glm4MoeModelReasoningParser",
),
"openai_gptoss": (
"gptoss_reasoning_parser",
"GptOssReasoningParser",
),
"granite": (
"granite_reasoning_parser",
"GraniteReasoningParser",
),
"hunyuan_a13b": (
"hunyuan_a13b_reasoning_parser",
"HunyuanA13BReasoningParser",
),
"minimax_m2": (
"minimax_m2_reasoning_parser",
"MiniMaxM2ReasoningParser",
),
"minimax_m2_append_think": (
"minimax_m2_reasoning_parser",
"MiniMaxM2AppendThinkReasoningParser",
),
"mistral": (
"mistral_reasoning_parser",
"MistralReasoningParser",
),
"olmo3": (
"olmo3_reasoning_parser",
"Olmo3ReasoningParser",
),
"qwen3": (
"qwen3_reasoning_parser",
"Qwen3ReasoningParser",
),
"seed_oss": (
"seedoss_reasoning_parser",
"SeedOSSReasoningParser",
),
"step3": (
"step3_reasoning_parser",
"Step3ReasoningParser",
),
}
def register_lazy_reasoning_parsers():
for name, (file_name, class_name) in _REASONING_PARSERS_TO_REGISTER.items():
module_path = f"vllm.reasoning.{file_name}"
ReasoningParserManager.register_lazy_module(name, module_path, class_name)
register_lazy_reasoning_parsers()

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import importlib
import os
from abc import abstractmethod
from collections.abc import Callable, Sequence
@ -129,50 +130,117 @@ class ReasoningParser:
class ReasoningParserManager:
reasoning_parsers: dict[str, type] = {}
"""
Central registry for ReasoningParser implementations.
Supports two registration modes:
- Eager registration via `register_module`
- Lazy registration via `register_lazy_module`
Each reasoning parser must inherit from `ReasoningParser`.
"""
reasoning_parsers: dict[str, type[ReasoningParser]] = {}
lazy_parsers: dict[str, tuple[str, str]] = {} # name -> (module_path, class_name)
@classmethod
def get_reasoning_parser(cls, name: str | None) -> type[ReasoningParser]:
def get_reasoning_parser(cls, name: str) -> type[ReasoningParser]:
"""
Get reasoning parser by name which is registered by `register_module`.
Retrieve a registered or lazily registered ReasoningParser class.
Raise a KeyError exception if the name is not registered.
If the parser is lazily registered, it will be imported and cached
on first access.
Raises:
KeyError: if no parser is found under the given name.
"""
if name in cls.reasoning_parsers:
return cls.reasoning_parsers[name]
raise KeyError(f"reasoning helper: '{name}' not found in reasoning_parsers")
if name in cls.lazy_parsers:
return cls._load_lazy_parser(name)
raise KeyError(f"Reasoning parser '{name}' not found.")
@classmethod
def list_registered(cls) -> list[str]:
"""Return names of all eagerly and lazily registered reasoning parsers."""
return sorted(set(cls.reasoning_parsers.keys()) | set(cls.lazy_parsers.keys()))
@classmethod
def _load_lazy_parser(cls, name: str) -> type[ReasoningParser]:
"""Import and register a lazily loaded reasoning parser."""
module_path, class_name = cls.lazy_parsers[name]
try:
mod = importlib.import_module(module_path)
parser_cls = getattr(mod, class_name)
if not issubclass(parser_cls, ReasoningParser):
raise TypeError(
f"{class_name} in {module_path} is not a ReasoningParser subclass."
)
cls.reasoning_parsers[name] = parser_cls # cache
return parser_cls
except Exception as e:
logger.exception(
"Failed to import lazy reasoning parser '%s' from %s: %s",
name,
module_path,
e,
)
raise
@classmethod
def _register_module(
cls,
module: type,
module: type[ReasoningParser],
module_name: str | list[str] | None = None,
force: bool = True,
) -> None:
"""Register a ReasoningParser class immediately."""
if not issubclass(module, ReasoningParser):
raise TypeError(
f"module must be subclass of ReasoningParser, but got {type(module)}"
)
if module_name is None:
module_name = module.__name__
if isinstance(module_name, str):
module_name = [module_name]
for name in module_name:
module_names = [module.__name__]
elif isinstance(module_name, str):
module_names = [module_name]
elif is_list_of(module_name, str):
module_names = module_name
else:
raise TypeError("module_name must be str, list[str], or None.")
for name in module_names:
if not force and name in cls.reasoning_parsers:
existed_module = cls.reasoning_parsers[name]
raise KeyError(
f"{name} is already registered at {existed_module.__module__}"
)
existed = cls.reasoning_parsers[name]
raise KeyError(f"{name} is already registered at {existed.__module__}")
cls.reasoning_parsers[name] = module
@classmethod
def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
"""
Register a lazy module mapping for delayed import.
Example:
ReasoningParserManager.register_lazy_module(
name="qwen3",
module_path="vllm.reasoning.parsers.qwen3_reasoning_parser",
class_name="Qwen3ReasoningParser",
)
"""
cls.lazy_parsers[name] = (module_path, class_name)
@classmethod
def register_module(
cls,
name: str | list[str] | None = None,
force: bool = True,
module: type | None = None,
) -> type | Callable:
module: type[ReasoningParser] | None = None,
) -> (
type[ReasoningParser] | Callable[[type[ReasoningParser]], type[ReasoningParser]]
):
"""
Register module with the given name or name list. it can be used as a
decoder(with module as None) or normal function(with module as not
@ -181,24 +249,29 @@ class ReasoningParserManager:
if not isinstance(force, bool):
raise TypeError(f"force must be a boolean, but got {type(force)}")
# raise the error ahead of time
if not (name is None or isinstance(name, str) or is_list_of(name, str)):
raise TypeError(
"name must be None, an instance of str, or a sequence of str, "
f"but got {type(name)}"
)
# use it as a normal method: x.register_module(module=SomeClass)
# Immediate registration (explicit call)
if module is not None:
cls._register_module(module=module, module_name=name, force=force)
return module
# use it as a decorator: @x.register_module()
def _register(module):
cls._register_module(module=module, module_name=name, force=force)
return module
# Decorator usage
def _decorator(obj: type[ReasoningParser]) -> type[ReasoningParser]:
module_path = obj.__module__
class_name = obj.__name__
return _register
if isinstance(name, str):
names = [name]
elif is_list_of(name, str):
names = name
else:
names = [class_name]
for n in names:
cls.lazy_parsers[n] = (module_path, class_name)
return obj
return _decorator
@classmethod
def import_reasoning_parser(cls, plugin_path: str) -> None:

View File

@ -4,11 +4,9 @@
from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import DeltaMessage
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
@ReasoningParserManager.register_module("deepseek_r1")
class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for DeepSeek R1 model.

View File

@ -10,7 +10,6 @@ from vllm.logger import init_logger
from vllm.reasoning import (
DeepSeekR1ReasoningParser,
ReasoningParser,
ReasoningParserManager,
)
from .identity_reasoning_parser import IdentityReasoningParser
@ -18,7 +17,6 @@ from .identity_reasoning_parser import IdentityReasoningParser
logger = init_logger(__name__)
@ReasoningParserManager.register_module("deepseek_v3")
class DeepSeekV3ReasoningParser(ReasoningParser):
"""
V3 parser that delegates to either DeepSeekR1ReasoningParser or

View File

@ -7,13 +7,11 @@ from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
logger = init_logger(__name__)
@ReasoningParserManager.register_module("ernie45")
class Ernie45ReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for Ernie45 thinking model.

View File

@ -7,12 +7,11 @@ from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
logger = init_logger(__name__)
@ReasoningParserManager.register_module("glm45")
class Glm4MoeModelReasoningParser(ReasoningParser):
"""
Reasoning parser for the Glm4MoeModel model.

View File

@ -9,7 +9,7 @@ from vllm.entrypoints.harmony_utils import parse_chat_output
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.tool_server import ToolServer
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
logger = init_logger(__name__)
@ -57,7 +57,6 @@ def tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list: list[str])
return new_tag
@ReasoningParserManager.register_module("openai_gptoss")
class GptOssReasoningParser(ReasoningParser):
"""
Reasoning parser for GptOss model.

View File

@ -8,12 +8,11 @@ from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
logger = init_logger(__name__)
@ReasoningParserManager.register_module("granite")
class GraniteReasoningParser(ReasoningParser):
"""
Reasoning parser for IBM Granite.

View File

@ -8,12 +8,11 @@ from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
logger = init_logger(__name__)
@ReasoningParserManager.register_module("hunyuan_a13b")
class HunyuanA13BReasoningParser(ReasoningParser):
"""
Reasoning parser for Hunyuan A13B Model

View File

@ -9,14 +9,13 @@ from vllm.entrypoints.openai.protocol import (
ResponsesRequest,
)
from vllm.logger import init_logger
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
logger = init_logger(__name__)
@ReasoningParserManager.register_module("minimax_m2")
class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for MiniMax M2 model.
@ -33,7 +32,6 @@ class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
return "</think>"
@ReasoningParserManager.register_module("minimax_m2_append_think")
class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
"""
Reasoning parser for MiniMax M2 model.

View File

@ -4,14 +4,13 @@
from functools import cached_property
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
logger = init_logger(__name__)
@ReasoningParserManager.register_module("mistral")
class MistralReasoningParser(DeepSeekR1ReasoningParser):
"""
Reasoning parser for Mistral models.

View File

@ -17,7 +17,7 @@ from vllm.entrypoints.openai.protocol import (
ResponsesRequest,
)
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
logger = init_logger(__name__)
@ -192,7 +192,6 @@ class Olmo3ReasoningBuffer:
return delta_message
@ReasoningParserManager.register_module("olmo3")
class Olmo3ReasoningParser(ReasoningParser):
"""
Reasoning parser for Olmo 3 model

View File

@ -3,11 +3,9 @@
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ResponsesRequest
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
@ReasoningParserManager.register_module("qwen3")
class Qwen3ReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for the Qwen3 model.

View File

@ -1,11 +1,10 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
@ReasoningParserManager.register_module("seed_oss")
class SeedOSSReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for SeedOSS model.

View File

@ -8,12 +8,11 @@ from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning import ReasoningParser
logger = init_logger(__name__)
@ReasoningParserManager.register_module("step3")
class Step3ReasoningParser(ReasoningParser):
"""
Reasoning parser for Step3 model.