From 35e076b3a8207e7d39ebab82152fd4b3db648233 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Fri, 11 Apr 2025 18:05:40 +0800 Subject: [PATCH] [Misc] update api_client example (#16459) Signed-off-by: reidliu41 Co-authored-by: reidliu41 --- examples/online_serving/api_client.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/examples/online_serving/api_client.py b/examples/online_serving/api_client.py index e2944896d161..60e4bccb7517 100644 --- a/examples/online_serving/api_client.py +++ b/examples/online_serving/api_client.py @@ -1,5 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 """Example Python client for `vllm.entrypoints.api_server` +Start the demo server: + python -m vllm.entrypoints.api_server --model + NOTE: The API server is used only for demonstration and simple performance benchmarks. It is not intended for production use. For production use, we recommend `vllm serve` and the OpenAI client API. @@ -7,6 +10,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API. import argparse import json +from argparse import Namespace from collections.abc import Iterable import requests @@ -27,7 +31,6 @@ def post_http_request(prompt: str, pload = { "prompt": prompt, "n": n, - "use_beam_search": True, "temperature": 0.0, "max_tokens": 16, "stream": stream, @@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]: return output -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--host", type=str, default="localhost") - parser.add_argument("--port", type=int, default=8000) - parser.add_argument("--n", type=int, default=4) - parser.add_argument("--prompt", type=str, default="San Francisco is a") - parser.add_argument("--stream", action="store_true") - args = parser.parse_args() +def main(args: Namespace): prompt = args.prompt api_url = f"http://{args.host}:{args.port}/generate" n = args.n @@ -83,3 +79,14 @@ if __name__ == "__main__": output = get_response(response) for i, line in enumerate(output): print(f"Beam candidate {i}: {line!r}", flush=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="localhost") + parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--n", type=int, default=1) + parser.add_argument("--prompt", type=str, default="San Francisco is a") + parser.add_argument("--stream", action="store_true") + args = parser.parse_args() + main(args)