diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md index 0e29204f8947c..6a08f872def15 100644 --- a/docs/serving/openai_compatible_server.md +++ b/docs/serving/openai_compatible_server.md @@ -47,6 +47,8 @@ We currently support the following OpenAI APIs: - [Completions API](#completions-api) (`/v1/completions`) - Only applicable to [text generation models](../models/generative_models.md). - *Note: `suffix` parameter is not supported.* +- [Responses API](#responses-api) (`/v1/responses`) + - Only applicable to [text generation models](../models/generative_models.md). - [Chat Completions API](#chat-api) (`/v1/chat/completions`) - Only applicable to [text generation models](../models/generative_models.md) with a [chat template](../serving/openai_compatible_server.md#chat-template). - *Note: `user` parameter is ignored.* @@ -229,6 +231,31 @@ The following extra parameters are supported: --8<-- "vllm/entrypoints/openai/protocol.py:chat-completion-extra-params" ``` +### Responses API + +Our Responses API is compatible with [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses); +you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it. + +Code example: [examples/online_serving/openai_responses_client_with_tools.py](../../examples/online_serving/openai_responses_client_with_tools.py) + +#### Extra parameters + +The following extra parameters in the request object are supported: + +??? code + + ```python + --8<-- "vllm/entrypoints/openai/protocol.py:responses-extra-params" + ``` + +The following extra parameters in the response object are supported: + +??? code + + ```python + --8<-- "vllm/entrypoints/openai/protocol.py:responses-response-extra-params" + ``` + ### Embeddings API Our Embeddings API is compatible with [OpenAI's Embeddings API](https://platform.openai.com/docs/api-reference/embeddings); diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 94dde4564ea0c..a3c347cb1bd3f 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1654,13 +1654,23 @@ class ResponsesResponse(OpenAIBaseModel): usage: ResponseUsage | None = None user: str | None = None - # --8<-- [start:responses-extra-params] + # --8<-- [start:responses-response-extra-params] # These are populated when enable_response_messages is set to True # NOTE: custom serialization is needed # see serialize_input_messages and serialize_output_messages - input_messages: ResponseInputOutputMessage | None = None - output_messages: ResponseInputOutputMessage | None = None - # --8<-- [end:responses-extra-params] + input_messages: ResponseInputOutputMessage | None = Field( + default=None, + description=( + "If enable_response_messages, we can show raw token input to model." + ), + ) + output_messages: ResponseInputOutputMessage | None = Field( + default=None, + description=( + "If enable_response_messages, we can show raw token output of model." + ), + ) + # --8<-- [end:responses-response-extra-params] # NOTE: openAI harmony doesn't serialize TextContent properly, # TODO: this fixes for TextContent, but need to verify for tools etc