diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 5a0a05f9af32..c4023a618528 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -661,7 +661,7 @@ class ChatCompletionRequest(OpenAIBaseModel): ), ) request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -1078,7 +1078,7 @@ class CompletionRequest(OpenAIBaseModel): ), ) request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -1375,7 +1375,7 @@ class EmbeddingCompletionRequest(OpenAIBaseModel): ), ) request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -1470,7 +1470,7 @@ class EmbeddingChatRequest(OpenAIBaseModel): ), ) request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -1892,7 +1892,7 @@ class ClassificationCompletionRequest(OpenAIBaseModel): ), ) request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -1983,7 +1983,7 @@ class ClassificationChatRequest(OpenAIBaseModel): ) request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -3094,7 +3094,7 @@ class TranslationResponseVerbose(OpenAIBaseModel): ####### Tokens IN <> Tokens OUT ####### class GenerateRequest(BaseModel): request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " @@ -3151,7 +3151,7 @@ class GenerateResponseChoice(BaseModel): class GenerateResponse(BaseModel): request_id: str = Field( - default_factory=lambda: f"{random_uuid()}", + default_factory=random_uuid, description=( "The request_id related to this request. If the caller does " "not set it, a random_uuid will be generated. This id is used " diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index de22c48809dc..09a135b701d0 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1349,11 +1349,12 @@ class OpenAIServing: raw_request: Request | None, default: str | None = None ) -> str | None: """Pulls the request id to use from a header, if provided""" - default = default or random_uuid() - if raw_request is None: - return default + if raw_request is not None and ( + (req_id := raw_request.headers.get("X-Request-Id")) is not None + ): + return req_id - return raw_request.headers.get("X-Request-Id", default) + return random_uuid() if default is None else default @staticmethod def _get_data_parallel_rank(raw_request: Request | None) -> int | None: diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index d94da71b289f..fddcc2720430 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -52,9 +52,11 @@ STR_FLASHINFER_ATTN_VAL: str = "FLASHINFER" STR_FLASH_ATTN_VAL: str = "FLASH_ATTN" STR_INVALID_VAL: str = "INVALID" +MASK_64_BITS = (1 << 64) - 1 + def random_uuid() -> str: - return str(uuid.uuid4().hex) + return f"{uuid.uuid4().int & MASK_64_BITS:016x}" # 16 hex chars def length_from_prompt_token_ids_or_embeds(