mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-09 06:25:16 +08:00
[Frontend] Add request_id to the Request object so they can be controlled better via external load balancers (#21009)
Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
This commit is contained in:
parent
29c6fbe58c
commit
9fe98d4250
@ -1007,6 +1007,13 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
"default: 0). Any priority other than 0 will raise an error "
|
||||
"if the served model does not use priority scheduling."),
|
||||
)
|
||||
request_id: str = Field(
|
||||
default_factory=lambda: f"{random_uuid()}",
|
||||
description=(
|
||||
"The request_id related to this request. If the caller does "
|
||||
"not set it, a random_uuid will be generated. This id is used "
|
||||
"through out the inference process and return in response."),
|
||||
)
|
||||
logits_processors: Optional[LogitsProcessors] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
@ -1251,6 +1258,13 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
|
||||
"default: 0). Any priority other than 0 will raise an error "
|
||||
"if the served model does not use priority scheduling."),
|
||||
)
|
||||
request_id: str = Field(
|
||||
default_factory=lambda: f"{random_uuid()}",
|
||||
description=(
|
||||
"The request_id related to this request. If the caller does "
|
||||
"not set it, a random_uuid will be generated. This id is used "
|
||||
"through out the inference process and return in response."),
|
||||
)
|
||||
|
||||
# --8<-- [end:embedding-extra-params]
|
||||
|
||||
@ -1302,6 +1316,13 @@ class EmbeddingChatRequest(OpenAIBaseModel):
|
||||
"default: 0). Any priority other than 0 will raise an error "
|
||||
"if the served model does not use priority scheduling."),
|
||||
)
|
||||
request_id: str = Field(
|
||||
default_factory=lambda: f"{random_uuid()}",
|
||||
description=(
|
||||
"The request_id related to this request. If the caller does "
|
||||
"not set it, a random_uuid will be generated. This id is used "
|
||||
"through out the inference process and return in response."),
|
||||
)
|
||||
# --8<-- [end:chat-embedding-extra-params]
|
||||
|
||||
@model_validator(mode="before")
|
||||
|
||||
@ -113,7 +113,9 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
return self.create_error_response(
|
||||
"Echo is unsupported with prompt embeds.")
|
||||
|
||||
request_id = f"cmpl-{self._base_request_id(raw_request)}"
|
||||
request_id = (
|
||||
f"cmpl-"
|
||||
f"{self._base_request_id(raw_request, request.request_id)}")
|
||||
created_time = int(time.time())
|
||||
|
||||
request_metadata = RequestResponseMetadata(request_id=request_id)
|
||||
|
||||
@ -163,8 +163,9 @@ class OpenAIServingEmbedding(EmbeddingMixin):
|
||||
for the API specification. This API mimics the OpenAI Embedding API.
|
||||
"""
|
||||
model_name = self._get_model_name(request.model)
|
||||
request_id = (f"{self.request_id_prefix}-"
|
||||
f"{self._base_request_id(raw_request)}")
|
||||
request_id = (
|
||||
f"{self.request_id_prefix}-"
|
||||
f"{self._base_request_id(raw_request, request.request_id)}")
|
||||
|
||||
ctx = EmbeddingServeContext(
|
||||
request=request,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user