diff --git a/vllm/entrypoints/disaggregated/types.py b/vllm/entrypoints/disaggregated/types.py index 73f9bbf975480..0a2b26985ae13 100644 --- a/vllm/entrypoints/disaggregated/types.py +++ b/vllm/entrypoints/disaggregated/types.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 +from typing import Optional + import msgspec -from typing import List, Optional + from vllm import SamplingParams # NOTE FOR DEVELOPERS: @@ -9,23 +11,28 @@ from vllm import SamplingParams # SETUP WE WILL USE TCP. WE CANNOT USE PICKLE OTHERWISE # WE RISK REMOTE CODE EXECUTION FROM UNSTRUSTED USERS. -class PDRequest(msgspec.Struct, - array_like=True, # type: ignore[call-arg] - omit_defaults=True, # type: ignore[call-arg] - gc=False): # type: ignore[call-arg] + +class PDRequest( + msgspec.Struct, + array_like=True, # type: ignore[call-arg] + omit_defaults=True, # type: ignore[call-arg] + gc=False): # type: ignore[call-arg] request_id: str - prompt_token_ids: List[int] + prompt_token_ids: list[int] sampling_params: SamplingParams # TODO: support multimodal inputs. -class PDResponse(msgspec.Struct, - array_like=True, # type: ignore[call-arg] - omit_defaults=True, # type: ignore[call-arg] - gc=False): # type: ignore[call-arg] + +class PDResponse( + msgspec.Struct, + array_like=True, # type: ignore[call-arg] + omit_defaults=True, # type: ignore[call-arg] + gc=False): # type: ignore[call-arg] request_id: str success: bool text: str - token_ids: List[int] + token_ids: list[int] finish_reason: Optional[str] = None stop_reason: Optional[str] = None - logprobs = None # TODO + # TODO: support full protocol. + logprobs = None