From 24d0ef89705e0ab8df3d79fcbfd669cf5575772b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Thu, 29 May 2025 11:58:14 +0200 Subject: [PATCH] [Misc] Replace TODO in serving transcription (#18895) Signed-off-by: NickLucche --- vllm/entrypoints/openai/serving_transcription.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_transcription.py b/vllm/entrypoints/openai/serving_transcription.py index 13565d0ef8dd..9fc5b562e7d5 100644 --- a/vllm/entrypoints/openai/serving_transcription.py +++ b/vllm/entrypoints/openai/serving_transcription.py @@ -278,7 +278,9 @@ class OpenAIServingTranscription(OpenAIServing): result_generator: Optional[AsyncGenerator[RequestOutput, None]] = None try: - # TODO(rob): subtract len of tokenized prompt. + # Unlike most decoder-only models, whisper generation length is not + # constrained by the size of the input audio, which is mapped to a + # fixed-size log-mel-spectogram. default_max_tokens = self.model_config.max_model_len sampling_params = request.to_sampling_params( default_max_tokens, self.default_sampling_params)