mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 10:46:08 +08:00
feat: add usage to TranscriptionResponse (text and json response_format) (#23576)
Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
This commit is contained in:
parent
384dd1b0a8
commit
ebd5a77bb5
@ -69,8 +69,11 @@ async def test_basic_audio(mary_had_lamb, model_name):
|
|||||||
language="en",
|
language="en",
|
||||||
response_format="text",
|
response_format="text",
|
||||||
temperature=0.0)
|
temperature=0.0)
|
||||||
out = json.loads(transcription)['text']
|
out = json.loads(transcription)
|
||||||
assert "Mary had a little lamb," in out
|
out_text = out['text']
|
||||||
|
out_usage = out['usage']
|
||||||
|
assert "Mary had a little lamb," in out_text
|
||||||
|
assert out_usage["seconds"] == 16, out_usage["seconds"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@ -116,9 +119,12 @@ async def test_long_audio_request(mary_had_lamb, client):
|
|||||||
language="en",
|
language="en",
|
||||||
response_format="text",
|
response_format="text",
|
||||||
temperature=0.0)
|
temperature=0.0)
|
||||||
out = json.loads(transcription)['text']
|
out = json.loads(transcription)
|
||||||
counts = out.count("Mary had a little lamb")
|
out_text = out['text']
|
||||||
|
out_usage = out['usage']
|
||||||
|
counts = out_text.count("Mary had a little lamb")
|
||||||
assert counts == 10, counts
|
assert counts == 10, counts
|
||||||
|
assert out_usage["seconds"] == 161, out_usage["seconds"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|||||||
@ -2232,9 +2232,15 @@ class TranscriptionRequest(OpenAIBaseModel):
|
|||||||
|
|
||||||
|
|
||||||
# Transcription response objects
|
# Transcription response objects
|
||||||
|
class TranscriptionUsageAudio(OpenAIBaseModel):
|
||||||
|
type: Literal["duration"] = "duration"
|
||||||
|
seconds: int
|
||||||
|
|
||||||
|
|
||||||
class TranscriptionResponse(OpenAIBaseModel):
|
class TranscriptionResponse(OpenAIBaseModel):
|
||||||
text: str
|
text: str
|
||||||
"""The transcribed text."""
|
"""The transcribed text."""
|
||||||
|
usage: TranscriptionUsageAudio
|
||||||
|
|
||||||
|
|
||||||
class TranscriptionWord(OpenAIBaseModel):
|
class TranscriptionWord(OpenAIBaseModel):
|
||||||
|
|||||||
@ -200,7 +200,22 @@ class OpenAISpeechToText(OpenAIServing):
|
|||||||
for result_generator in list_result_generator:
|
for result_generator in list_result_generator:
|
||||||
async for op in result_generator:
|
async for op in result_generator:
|
||||||
text += op.outputs[0].text
|
text += op.outputs[0].text
|
||||||
return cast(T, response_class(text=text))
|
|
||||||
|
if self.task_type == "transcribe":
|
||||||
|
# add usage in TranscriptionResponse.
|
||||||
|
usage = {
|
||||||
|
"type": "duration",
|
||||||
|
# rounded up as per openAI specs
|
||||||
|
"seconds": int(math.ceil(duration_s)),
|
||||||
|
}
|
||||||
|
final_response = cast(T, response_class(text=text,
|
||||||
|
usage=usage))
|
||||||
|
else:
|
||||||
|
# no usage in response for translation task
|
||||||
|
final_response = cast(
|
||||||
|
T, response_class(text=text)) # type: ignore[call-arg]
|
||||||
|
|
||||||
|
return final_response
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
return self.create_error_response("Client disconnected")
|
return self.create_error_response("Client disconnected")
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user