mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-18 20:54:36 +08:00
[doc] fix multimodal example script (#18089)
Signed-off-by: David Xia <david@davidxia.com>
This commit is contained in:
parent
3d2779c29a
commit
5c04bb8b86
@ -1,5 +1,5 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
"""An example showing how to use vLLM to serve multimodal models
|
||||
"""An example showing how to use vLLM to serve multimodal models
|
||||
and run online serving with OpenAI client.
|
||||
|
||||
Launch the vLLM server with the following command:
|
||||
@ -12,12 +12,18 @@ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
|
||||
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
|
||||
|
||||
(audio inference with Ultravox)
|
||||
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096
|
||||
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b \
|
||||
--max-model-len 4096 --trust-remote-code
|
||||
|
||||
run the script with
|
||||
python openai_chat_completion_client_for_multimodal.py --chat-type audio
|
||||
"""
|
||||
|
||||
import base64
|
||||
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
from utils import get_first_model
|
||||
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
|
||||
@ -31,9 +37,6 @@ client = OpenAI(
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
|
||||
models = client.models.list()
|
||||
model = models.data[0].id
|
||||
|
||||
|
||||
def encode_base64_content_from_url(content_url: str) -> str:
|
||||
"""Encode a content retrieved from a remote url to base64 format."""
|
||||
@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
|
||||
|
||||
|
||||
# Text-only inference
|
||||
def run_text_only() -> None:
|
||||
def run_text_only(model: str) -> None:
|
||||
chat_completion = client.chat.completions.create(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
@ -61,7 +64,7 @@ def run_text_only() -> None:
|
||||
|
||||
|
||||
# Single-image input inference
|
||||
def run_single_image() -> None:
|
||||
def run_single_image(model: str) -> None:
|
||||
|
||||
## Use image url in the payload
|
||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
@ -117,7 +120,7 @@ def run_single_image() -> None:
|
||||
|
||||
|
||||
# Multi-image input inference
|
||||
def run_multi_image() -> None:
|
||||
def run_multi_image(model: str) -> None:
|
||||
image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
|
||||
image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
|
||||
chat_completion_from_url = client.chat.completions.create(
|
||||
@ -152,7 +155,7 @@ def run_multi_image() -> None:
|
||||
|
||||
|
||||
# Video input inference
|
||||
def run_video() -> None:
|
||||
def run_video(model: str) -> None:
|
||||
video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
|
||||
video_base64 = encode_base64_content_from_url(video_url)
|
||||
|
||||
@ -208,7 +211,7 @@ def run_video() -> None:
|
||||
|
||||
|
||||
# Audio input inference
|
||||
def run_audio() -> None:
|
||||
def run_audio(model: str) -> None:
|
||||
from vllm.assets.audio import AudioAsset
|
||||
|
||||
audio_url = AudioAsset("winning_call").url
|
||||
@ -318,7 +321,8 @@ def parse_args():
|
||||
|
||||
def main(args) -> None:
|
||||
chat_type = args.chat_type
|
||||
example_function_map[chat_type]()
|
||||
model = get_first_model(client)
|
||||
example_function_map[chat_type](model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
25
examples/online_serving/utils.py
Normal file
25
examples/online_serving/utils.py
Normal file
@ -0,0 +1,25 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from openai import APIConnectionError, OpenAI
|
||||
from openai.pagination import SyncPage
|
||||
from openai.types.model import Model
|
||||
|
||||
|
||||
def get_first_model(client: OpenAI) -> str:
|
||||
"""
|
||||
Get the first model from the vLLM server.
|
||||
"""
|
||||
try:
|
||||
models: SyncPage[Model] = client.models.list()
|
||||
except APIConnectionError as e:
|
||||
raise RuntimeError(
|
||||
"Failed to get the list of models from the vLLM server at "
|
||||
f"{client.base_url} with API key {client.api_key}. Check\n"
|
||||
"1. the server is running\n"
|
||||
"2. the server URL is correct\n"
|
||||
"3. the API key is correct") from e
|
||||
|
||||
if len(models.data) == 0:
|
||||
raise RuntimeError(
|
||||
f"No models found on the vLLM server at {client.base_url}")
|
||||
|
||||
return models.data[0].id
|
||||
Loading…
x
Reference in New Issue
Block a user