diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index ecfcc00687ad..79f0f200c74e 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -271,6 +271,7 @@ class ChatCompletionRequest(OpenAIBaseModel): spaces_between_special_tokens: bool = True truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None prompt_logprobs: Optional[int] = None + allowed_token_ids: Optional[list[int]] = None # --8<-- [end:chat-completion-sampling-params] # --8<-- [start:chat-completion-extra-params] @@ -549,6 +550,7 @@ class ChatCompletionRequest(OpenAIBaseModel): else RequestOutputKind.FINAL_ONLY, guided_decoding=guided_decoding, logit_bias=self.logit_bias, + allowed_token_ids=self.allowed_token_ids, extra_args=({"kv_transfer_params": self.kv_transfer_params} if self.kv_transfer_params else None))