mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 09:45:34 +08:00
[Docs] Update transcriptions API to use openai client with stream=True (#20271)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
parent
0e96cc9b7e
commit
314af8617c
@ -19,10 +19,8 @@ The script performs:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
|
||||||
|
|
||||||
import httpx
|
from openai import AsyncOpenAI, OpenAI
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
from vllm.assets.audio import AudioAsset
|
from vllm.assets.audio import AudioAsset
|
||||||
|
|
||||||
@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
|
|||||||
print("transcription result:", transcription.text)
|
print("transcription result:", transcription.text)
|
||||||
|
|
||||||
|
|
||||||
async def stream_openai_response(audio_path: str, base_url: str, api_key: str):
|
async def stream_openai_response(audio_path: str, client: AsyncOpenAI):
|
||||||
"""
|
"""
|
||||||
Perform streaming transcription using vLLM's raw HTTP streaming API.
|
Perform asynchronous transcription using OpenAI-compatible API.
|
||||||
"""
|
"""
|
||||||
data = {
|
print("\ntranscription result:", end=" ")
|
||||||
"language": "en",
|
with open(audio_path, "rb") as f:
|
||||||
"stream": True,
|
transcription = await client.audio.transcriptions.create(
|
||||||
"model": "openai/whisper-large-v3",
|
file=f,
|
||||||
}
|
model="openai/whisper-large-v3",
|
||||||
url = base_url + "/audio/transcriptions"
|
language="en",
|
||||||
headers = {"Authorization": f"Bearer {api_key}"}
|
response_format="json",
|
||||||
print("transcription result:", end=" ")
|
temperature=0.0,
|
||||||
# OpenAI Transcription API client does not support streaming.
|
# Additional sampling params not provided by OpenAI API.
|
||||||
async with httpx.AsyncClient() as client:
|
extra_body=dict(
|
||||||
with open(audio_path, "rb") as f:
|
seed=420,
|
||||||
async with client.stream(
|
top_p=0.6,
|
||||||
"POST", url, files={"file": f}, data=data, headers=headers
|
),
|
||||||
) as response:
|
stream=True,
|
||||||
async for line in response.aiter_lines():
|
)
|
||||||
# Each line is a JSON object prefixed with 'data: '
|
async for chunk in transcription:
|
||||||
if line:
|
if chunk.choices:
|
||||||
if line.startswith("data: "):
|
content = chunk.choices[0].get("delta", {}).get("content")
|
||||||
line = line[len("data: ") :]
|
print(content, end="", flush=True)
|
||||||
# Last chunk, stream ends
|
|
||||||
if line.strip() == "[DONE]":
|
|
||||||
break
|
|
||||||
# Parse the JSON response
|
|
||||||
chunk = json.loads(line)
|
|
||||||
# Extract and print the content
|
|
||||||
content = chunk["choices"][0].get("delta", {}).get("content")
|
|
||||||
print(content, end="")
|
|
||||||
print() # Final newline after stream ends
|
print() # Final newline after stream ends
|
||||||
|
|
||||||
|
|
||||||
@ -95,7 +86,11 @@ def main():
|
|||||||
|
|
||||||
sync_openai(mary_had_lamb, client)
|
sync_openai(mary_had_lamb, client)
|
||||||
# Run the asynchronous function
|
# Run the asynchronous function
|
||||||
asyncio.run(stream_openai_response(winning_call, openai_api_base, openai_api_key))
|
client = AsyncOpenAI(
|
||||||
|
api_key=openai_api_key,
|
||||||
|
base_url=openai_api_base,
|
||||||
|
)
|
||||||
|
asyncio.run(stream_openai_response(winning_call, client))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
|
|||||||
timestamps incurs additional latency.
|
timestamps incurs additional latency.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# --8<-- [start:transcription-extra-params]
|
|
||||||
stream: Optional[bool] = False
|
stream: Optional[bool] = False
|
||||||
"""Custom field not present in the original OpenAI definition. When set,
|
"""When set, it will enable output to be streamed in a similar fashion
|
||||||
it will enable output to be streamed in a similar fashion as the Chat
|
as the Chat Completion endpoint.
|
||||||
Completion endpoint.
|
|
||||||
"""
|
"""
|
||||||
|
# --8<-- [start:transcription-extra-params]
|
||||||
# Flattened stream option to simplify form data.
|
# Flattened stream option to simplify form data.
|
||||||
stream_include_usage: Optional[bool] = False
|
stream_include_usage: Optional[bool] = False
|
||||||
stream_continuous_usage_stats: Optional[bool] = False
|
stream_continuous_usage_stats: Optional[bool] = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user