mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 04:24:56 +08:00
[Responses API] Ignore store=True and process the request by default (#22185)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
a5fff3bd49
commit
9af654cc38
@ -90,8 +90,17 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
logger.info("Using default chat sampling params from %s: %s",
|
logger.info("Using default chat sampling params from %s: %s",
|
||||||
source, self.default_sampling_params)
|
source, self.default_sampling_params)
|
||||||
|
|
||||||
# False by default.
|
# If False (default), the "store" option is (silently) ignored and the
|
||||||
|
# response is not stored. If True, the response is stored in memory.
|
||||||
|
# NOTE(woosuk): This may not be intuitive for users, as the default
|
||||||
|
# behavior in OpenAI's Responses API is to store the response, but
|
||||||
|
# vLLM's default behavior is not.
|
||||||
self.enable_store = envs.VLLM_ENABLE_RESPONSES_API_STORE
|
self.enable_store = envs.VLLM_ENABLE_RESPONSES_API_STORE
|
||||||
|
if self.enable_store:
|
||||||
|
logger.warning_once(
|
||||||
|
"`VLLM_ENABLE_RESPONSES_API_STORE` is enabled. This may "
|
||||||
|
"cause a memory leak since we never remove responses from "
|
||||||
|
"the store.")
|
||||||
# HACK(woosuk): This is a hack. We should use a better store.
|
# HACK(woosuk): This is a hack. We should use a better store.
|
||||||
# FIXME: If enable_store=True, this may cause a memory leak since we
|
# FIXME: If enable_store=True, this may cause a memory leak since we
|
||||||
# never remove responses from the store.
|
# never remove responses from the store.
|
||||||
@ -121,9 +130,25 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
if self.engine_client.errored:
|
if self.engine_client.errored:
|
||||||
raise self.engine_client.dead_error
|
raise self.engine_client.dead_error
|
||||||
|
|
||||||
# If store is not enabled, return an error.
|
|
||||||
if request.store and not self.enable_store:
|
if request.store and not self.enable_store:
|
||||||
return self._make_store_not_supported_error()
|
if request.background:
|
||||||
|
return self.create_error_response(
|
||||||
|
err_type="invalid_request_error",
|
||||||
|
message=(
|
||||||
|
"This vLLM engine does not support `store=True` and "
|
||||||
|
"therefore does not support the background mode. To "
|
||||||
|
"enable these features, set the environment variable "
|
||||||
|
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
|
||||||
|
"the vLLM server."),
|
||||||
|
status_code=HTTPStatus.BAD_REQUEST,
|
||||||
|
)
|
||||||
|
# Disable the store option.
|
||||||
|
# NOTE(woosuk): Although returning an error is possible, we opted
|
||||||
|
# to implicitly disable store and process the request anyway, as
|
||||||
|
# we assume most users do not intend to actually store the response
|
||||||
|
# (i.e., their request's `store=True` just because it's the default
|
||||||
|
# value).
|
||||||
|
request.store = False
|
||||||
|
|
||||||
# Handle the previous response ID.
|
# Handle the previous response ID.
|
||||||
prev_response_id = request.previous_response_id
|
prev_response_id = request.previous_response_id
|
||||||
|
|||||||
@ -1060,7 +1060,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
|
|
||||||
# Enables support for the "store" option in the OpenAI Responses API.
|
# Enables support for the "store" option in the OpenAI Responses API.
|
||||||
# When set to 1, vLLM's OpenAI server will retain the input and output
|
# When set to 1, vLLM's OpenAI server will retain the input and output
|
||||||
# messages for those requests in memory. By default, this is disabled (0).
|
# messages for those requests in memory. By default, this is disabled (0),
|
||||||
|
# and the "store" option is ignored.
|
||||||
# NOTE/WARNING:
|
# NOTE/WARNING:
|
||||||
# 1. Messages are kept in memory only (not persisted to disk) and will be
|
# 1. Messages are kept in memory only (not persisted to disk) and will be
|
||||||
# lost when the vLLM server shuts down.
|
# lost when the vLLM server shuts down.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user