feat: add usage to TranscriptionResponse (text and json response_format) (#23576)

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
This commit is contained in:
Guillaume Calmettes 2025-08-26 14:26:26 +02:00 committed by GitHub
parent 384dd1b0a8
commit ebd5a77bb5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 5 deletions

View File

@ -69,8 +69,11 @@ async def test_basic_audio(mary_had_lamb, model_name):
language="en", language="en",
response_format="text", response_format="text",
temperature=0.0) temperature=0.0)
out = json.loads(transcription)['text'] out = json.loads(transcription)
assert "Mary had a little lamb," in out out_text = out['text']
out_usage = out['usage']
assert "Mary had a little lamb," in out_text
assert out_usage["seconds"] == 16, out_usage["seconds"]
@pytest.mark.asyncio @pytest.mark.asyncio
@ -116,9 +119,12 @@ async def test_long_audio_request(mary_had_lamb, client):
language="en", language="en",
response_format="text", response_format="text",
temperature=0.0) temperature=0.0)
out = json.loads(transcription)['text'] out = json.loads(transcription)
counts = out.count("Mary had a little lamb") out_text = out['text']
out_usage = out['usage']
counts = out_text.count("Mary had a little lamb")
assert counts == 10, counts assert counts == 10, counts
assert out_usage["seconds"] == 161, out_usage["seconds"]
@pytest.mark.asyncio @pytest.mark.asyncio

View File

@ -2232,9 +2232,15 @@ class TranscriptionRequest(OpenAIBaseModel):
# Transcription response objects # Transcription response objects
class TranscriptionUsageAudio(OpenAIBaseModel):
type: Literal["duration"] = "duration"
seconds: int
class TranscriptionResponse(OpenAIBaseModel): class TranscriptionResponse(OpenAIBaseModel):
text: str text: str
"""The transcribed text.""" """The transcribed text."""
usage: TranscriptionUsageAudio
class TranscriptionWord(OpenAIBaseModel): class TranscriptionWord(OpenAIBaseModel):

View File

@ -200,7 +200,22 @@ class OpenAISpeechToText(OpenAIServing):
for result_generator in list_result_generator: for result_generator in list_result_generator:
async for op in result_generator: async for op in result_generator:
text += op.outputs[0].text text += op.outputs[0].text
return cast(T, response_class(text=text))
if self.task_type == "transcribe":
# add usage in TranscriptionResponse.
usage = {
"type": "duration",
# rounded up as per openAI specs
"seconds": int(math.ceil(duration_s)),
}
final_response = cast(T, response_class(text=text,
usage=usage))
else:
# no usage in response for translation task
final_response = cast(
T, response_class(text=text)) # type: ignore[call-arg]
return final_response
except asyncio.CancelledError: except asyncio.CancelledError:
return self.create_error_response("Client disconnected") return self.create_error_response("Client disconnected")
except ValueError as e: except ValueError as e: