From 6d243efedab9a03348cbd55fe966b62a08d90676 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 17 Aug 2025 12:41:38 -0700 Subject: [PATCH] [Misc] Convert use_structured_output property into constant (#23060) Signed-off-by: Woosuk Kwon --- vllm/v1/request.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/vllm/v1/request.py b/vllm/v1/request.py index 8b703b6191fe..4e99a9ccef46 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -54,8 +54,7 @@ class Request: time.time() self.status = RequestStatus.WAITING - if sampling_params and sampling_params.guided_decoding is not None: - self.status = RequestStatus.WAITING_FOR_FSM + self.use_structured_output = False self.events: list[EngineCoreEvent] = [] self.stop_reason: Union[int, str, None] = None @@ -63,12 +62,15 @@ class Request: self.kv_transfer_params: Optional[dict[str, Any]] = None if pooling_params is not None: + # Pooling models. self.max_tokens = 1 elif sampling_params is not None: + # Generative models. assert sampling_params.max_tokens is not None self.max_tokens = sampling_params.max_tokens if sampling_params.guided_decoding is not None: self.status = RequestStatus.WAITING_FOR_FSM + self.use_structured_output = True if sampling_params.extra_args is not None: self.kv_transfer_params = \ @@ -192,11 +194,6 @@ class Request: num_tokens = self.mm_positions[input_id].length return num_tokens - @property - def use_structured_output(self) -> bool: - return self.sampling_params is not None and \ - self.sampling_params.guided_decoding is not None - def record_event( self, event_type: EngineCoreEventType,