diff --git a/vllm/entrypoints/openai/serving_transcription.py b/vllm/entrypoints/openai/serving_transcription.py index 13565d0ef8dd..9fc5b562e7d5 100644 --- a/vllm/entrypoints/openai/serving_transcription.py +++ b/vllm/entrypoints/openai/serving_transcription.py @@ -278,7 +278,9 @@ class OpenAIServingTranscription(OpenAIServing): result_generator: Optional[AsyncGenerator[RequestOutput, None]] = None try: - # TODO(rob): subtract len of tokenized prompt. + # Unlike most decoder-only models, whisper generation length is not + # constrained by the size of the input audio, which is mapped to a + # fixed-size log-mel-spectogram. default_max_tokens = self.model_config.max_model_len sampling_params = request.to_sampling_params( default_max_tokens, self.default_sampling_params)