[Misc] update api_client example (#16459)

Signed-off-by: reidliu41 <reid201711@gmail.com>
Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
Reid 2025-04-11 18:05:40 +08:00 committed by GitHub
parent a26f59ccbc
commit 35e076b3a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,8 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Example Python client for `vllm.entrypoints.api_server` """Example Python client for `vllm.entrypoints.api_server`
Start the demo server:
python -m vllm.entrypoints.api_server --model <model_name>
NOTE: The API server is used only for demonstration and simple performance NOTE: The API server is used only for demonstration and simple performance
benchmarks. It is not intended for production use. benchmarks. It is not intended for production use.
For production use, we recommend `vllm serve` and the OpenAI client API. For production use, we recommend `vllm serve` and the OpenAI client API.
@ -7,6 +10,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API.
import argparse import argparse
import json import json
from argparse import Namespace
from collections.abc import Iterable from collections.abc import Iterable
import requests import requests
@ -27,7 +31,6 @@ def post_http_request(prompt: str,
pload = { pload = {
"prompt": prompt, "prompt": prompt,
"n": n, "n": n,
"use_beam_search": True,
"temperature": 0.0, "temperature": 0.0,
"max_tokens": 16, "max_tokens": 16,
"stream": stream, "stream": stream,
@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]:
return output return output
if __name__ == "__main__": def main(args: Namespace):
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--n", type=int, default=4)
parser.add_argument("--prompt", type=str, default="San Francisco is a")
parser.add_argument("--stream", action="store_true")
args = parser.parse_args()
prompt = args.prompt prompt = args.prompt
api_url = f"http://{args.host}:{args.port}/generate" api_url = f"http://{args.host}:{args.port}/generate"
n = args.n n = args.n
@ -83,3 +79,14 @@ if __name__ == "__main__":
output = get_response(response) output = get_response(response)
for i, line in enumerate(output): for i, line in enumerate(output):
print(f"Beam candidate {i}: {line!r}", flush=True) print(f"Beam candidate {i}: {line!r}", flush=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--n", type=int, default=1)
parser.add_argument("--prompt", type=str, default="San Francisco is a")
parser.add_argument("--stream", action="store_true")
args = parser.parse_args()
main(args)