mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 15:17:15 +08:00
[gpt-oss][1/N] EZ: refactor serving_responses for modularity (#26948)
Signed-off-by: Andrew Xia <axia@meta.com>
This commit is contained in:
parent
aa255ff55a
commit
e6ba2000ae
@ -227,6 +227,29 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
)
|
||||
return None
|
||||
|
||||
def _validate_create_responses_input(
|
||||
self, request: ResponsesRequest
|
||||
) -> ErrorResponse | None:
|
||||
if self.use_harmony and request.is_include_output_logprobs():
|
||||
return self.create_error_response(
|
||||
err_type="invalid_request_error",
|
||||
message="logprobs are not supported with gpt-oss models",
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
)
|
||||
if request.store and not self.enable_store and request.background:
|
||||
return self.create_error_response(
|
||||
err_type="invalid_request_error",
|
||||
message=(
|
||||
"This vLLM engine does not support `store=True` and "
|
||||
"therefore does not support the background mode. To "
|
||||
"enable these features, set the environment variable "
|
||||
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
|
||||
"the vLLM server."
|
||||
),
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
)
|
||||
return None
|
||||
|
||||
async def create_responses(
|
||||
self,
|
||||
request: ResponsesRequest,
|
||||
@ -240,6 +263,9 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
if error_check_ret is not None:
|
||||
logger.error("Error with model %s", error_check_ret)
|
||||
return error_check_ret
|
||||
maybe_validation_error = self._validate_create_responses_input(request)
|
||||
if maybe_validation_error is not None:
|
||||
return maybe_validation_error
|
||||
|
||||
# If the engine is dead, raise the engine's DEAD_ERROR.
|
||||
# This is required for the streaming case, where we return a
|
||||
@ -248,18 +274,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
raise self.engine_client.dead_error
|
||||
|
||||
if request.store and not self.enable_store:
|
||||
if request.background:
|
||||
return self.create_error_response(
|
||||
err_type="invalid_request_error",
|
||||
message=(
|
||||
"This vLLM engine does not support `store=True` and "
|
||||
"therefore does not support the background mode. To "
|
||||
"enable these features, set the environment variable "
|
||||
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
|
||||
"the vLLM server."
|
||||
),
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
)
|
||||
# Disable the store option.
|
||||
# NOTE(woosuk): Although returning an error is possible, we opted
|
||||
# to implicitly disable store and process the request anyway, as
|
||||
@ -267,12 +281,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
# (i.e., their request's `store=True` just because it's the default
|
||||
# value).
|
||||
request.store = False
|
||||
if self.use_harmony and request.is_include_output_logprobs():
|
||||
return self.create_error_response(
|
||||
err_type="invalid_request_error",
|
||||
message="logprobs are not supported with gpt-oss models",
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
)
|
||||
|
||||
# Handle the previous response ID.
|
||||
prev_response_id = request.previous_response_id
|
||||
@ -849,6 +857,47 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
messages.extend(request.input) # type: ignore
|
||||
return messages
|
||||
|
||||
def _construct_harmony_system_input_message(
|
||||
self, request: ResponsesRequest, with_custom_tools: bool, tool_types: list[str]
|
||||
) -> OpenAIHarmonyMessage:
|
||||
reasoning_effort = request.reasoning.effort if request.reasoning else None
|
||||
enable_browser = (
|
||||
"web_search_preview" in tool_types
|
||||
and self.tool_server is not None
|
||||
and self.tool_server.has_tool("browser")
|
||||
)
|
||||
enable_code_interpreter = (
|
||||
"code_interpreter" in tool_types
|
||||
and self.tool_server is not None
|
||||
and self.tool_server.has_tool("python")
|
||||
)
|
||||
enable_container = (
|
||||
"container" in tool_types
|
||||
and self.tool_server is not None
|
||||
and self.tool_server.has_tool("container")
|
||||
)
|
||||
sys_msg = get_system_message(
|
||||
reasoning_effort=reasoning_effort,
|
||||
browser_description=(
|
||||
self.tool_server.get_tool_description("browser")
|
||||
if enable_browser and self.tool_server is not None
|
||||
else None
|
||||
),
|
||||
python_description=(
|
||||
self.tool_server.get_tool_description("python")
|
||||
if enable_code_interpreter and self.tool_server is not None
|
||||
else None
|
||||
),
|
||||
container_description=(
|
||||
self.tool_server.get_tool_description("container")
|
||||
if enable_container and self.tool_server is not None
|
||||
else None
|
||||
),
|
||||
instructions=request.instructions,
|
||||
with_custom_tools=with_custom_tools,
|
||||
)
|
||||
return sys_msg
|
||||
|
||||
def _construct_input_messages_with_harmony(
|
||||
self,
|
||||
request: ResponsesRequest,
|
||||
@ -857,9 +906,7 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
messages: list[OpenAIHarmonyMessage] = []
|
||||
if prev_response is None:
|
||||
# New conversation.
|
||||
reasoning_effort = request.reasoning.effort if request.reasoning else None
|
||||
tool_types = [tool.type for tool in request.tools]
|
||||
|
||||
# Allow the MCP Tool type to enable built in tools if the
|
||||
# server_label is allowlisted in
|
||||
# envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
|
||||
@ -870,41 +917,10 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
and tool.server_label in envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
|
||||
):
|
||||
tool_types.append(tool.server_label)
|
||||
enable_browser = (
|
||||
"web_search_preview" in tool_types
|
||||
and self.tool_server is not None
|
||||
and self.tool_server.has_tool("browser")
|
||||
)
|
||||
enable_code_interpreter = (
|
||||
"code_interpreter" in tool_types
|
||||
and self.tool_server is not None
|
||||
and self.tool_server.has_tool("python")
|
||||
)
|
||||
enable_container = (
|
||||
"container" in tool_types
|
||||
and self.tool_server is not None
|
||||
and self.tool_server.has_tool("container")
|
||||
)
|
||||
with_custom_tools = has_custom_tools(tool_types)
|
||||
sys_msg = get_system_message(
|
||||
reasoning_effort=reasoning_effort,
|
||||
browser_description=(
|
||||
self.tool_server.get_tool_description("browser")
|
||||
if enable_browser and self.tool_server is not None
|
||||
else None
|
||||
),
|
||||
python_description=(
|
||||
self.tool_server.get_tool_description("python")
|
||||
if enable_code_interpreter and self.tool_server is not None
|
||||
else None
|
||||
),
|
||||
container_description=(
|
||||
self.tool_server.get_tool_description("container")
|
||||
if enable_container and self.tool_server is not None
|
||||
else None
|
||||
),
|
||||
instructions=request.instructions,
|
||||
with_custom_tools=with_custom_tools,
|
||||
|
||||
sys_msg = self._construct_harmony_system_input_message(
|
||||
request, with_custom_tools, tool_types
|
||||
)
|
||||
messages.append(sys_msg)
|
||||
if with_custom_tools:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user