mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 06:45:01 +08:00
feat: add usage to TranscriptionResponse (text and json response_format) (#23576)
Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
This commit is contained in:
parent
384dd1b0a8
commit
ebd5a77bb5
@ -69,8 +69,11 @@ async def test_basic_audio(mary_had_lamb, model_name):
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0)
|
||||
out = json.loads(transcription)['text']
|
||||
assert "Mary had a little lamb," in out
|
||||
out = json.loads(transcription)
|
||||
out_text = out['text']
|
||||
out_usage = out['usage']
|
||||
assert "Mary had a little lamb," in out_text
|
||||
assert out_usage["seconds"] == 16, out_usage["seconds"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -116,9 +119,12 @@ async def test_long_audio_request(mary_had_lamb, client):
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0)
|
||||
out = json.loads(transcription)['text']
|
||||
counts = out.count("Mary had a little lamb")
|
||||
out = json.loads(transcription)
|
||||
out_text = out['text']
|
||||
out_usage = out['usage']
|
||||
counts = out_text.count("Mary had a little lamb")
|
||||
assert counts == 10, counts
|
||||
assert out_usage["seconds"] == 161, out_usage["seconds"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -2232,9 +2232,15 @@ class TranscriptionRequest(OpenAIBaseModel):
|
||||
|
||||
|
||||
# Transcription response objects
|
||||
class TranscriptionUsageAudio(OpenAIBaseModel):
|
||||
type: Literal["duration"] = "duration"
|
||||
seconds: int
|
||||
|
||||
|
||||
class TranscriptionResponse(OpenAIBaseModel):
|
||||
text: str
|
||||
"""The transcribed text."""
|
||||
usage: TranscriptionUsageAudio
|
||||
|
||||
|
||||
class TranscriptionWord(OpenAIBaseModel):
|
||||
|
||||
@ -200,7 +200,22 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
for result_generator in list_result_generator:
|
||||
async for op in result_generator:
|
||||
text += op.outputs[0].text
|
||||
return cast(T, response_class(text=text))
|
||||
|
||||
if self.task_type == "transcribe":
|
||||
# add usage in TranscriptionResponse.
|
||||
usage = {
|
||||
"type": "duration",
|
||||
# rounded up as per openAI specs
|
||||
"seconds": int(math.ceil(duration_s)),
|
||||
}
|
||||
final_response = cast(T, response_class(text=text,
|
||||
usage=usage))
|
||||
else:
|
||||
# no usage in response for translation task
|
||||
final_response = cast(
|
||||
T, response_class(text=text)) # type: ignore[call-arg]
|
||||
|
||||
return final_response
|
||||
except asyncio.CancelledError:
|
||||
return self.create_error_response("Client disconnected")
|
||||
except ValueError as e:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user