mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 01:55:36 +08:00
[Misc] update api_client example (#16459)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
parent
a26f59ccbc
commit
35e076b3a8
@ -1,5 +1,8 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
"""Example Python client for `vllm.entrypoints.api_server`
|
"""Example Python client for `vllm.entrypoints.api_server`
|
||||||
|
Start the demo server:
|
||||||
|
python -m vllm.entrypoints.api_server --model <model_name>
|
||||||
|
|
||||||
NOTE: The API server is used only for demonstration and simple performance
|
NOTE: The API server is used only for demonstration and simple performance
|
||||||
benchmarks. It is not intended for production use.
|
benchmarks. It is not intended for production use.
|
||||||
For production use, we recommend `vllm serve` and the OpenAI client API.
|
For production use, we recommend `vllm serve` and the OpenAI client API.
|
||||||
@ -7,6 +10,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API.
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
from argparse import Namespace
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@ -27,7 +31,6 @@ def post_http_request(prompt: str,
|
|||||||
pload = {
|
pload = {
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"n": n,
|
"n": n,
|
||||||
"use_beam_search": True,
|
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"max_tokens": 16,
|
"max_tokens": 16,
|
||||||
"stream": stream,
|
"stream": stream,
|
||||||
@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]:
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main(args: Namespace):
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--host", type=str, default="localhost")
|
|
||||||
parser.add_argument("--port", type=int, default=8000)
|
|
||||||
parser.add_argument("--n", type=int, default=4)
|
|
||||||
parser.add_argument("--prompt", type=str, default="San Francisco is a")
|
|
||||||
parser.add_argument("--stream", action="store_true")
|
|
||||||
args = parser.parse_args()
|
|
||||||
prompt = args.prompt
|
prompt = args.prompt
|
||||||
api_url = f"http://{args.host}:{args.port}/generate"
|
api_url = f"http://{args.host}:{args.port}/generate"
|
||||||
n = args.n
|
n = args.n
|
||||||
@ -83,3 +79,14 @@ if __name__ == "__main__":
|
|||||||
output = get_response(response)
|
output = get_response(response)
|
||||||
for i, line in enumerate(output):
|
for i, line in enumerate(output):
|
||||||
print(f"Beam candidate {i}: {line!r}", flush=True)
|
print(f"Beam candidate {i}: {line!r}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--host", type=str, default="localhost")
|
||||||
|
parser.add_argument("--port", type=int, default=8000)
|
||||||
|
parser.add_argument("--n", type=int, default=1)
|
||||||
|
parser.add_argument("--prompt", type=str, default="San Francisco is a")
|
||||||
|
parser.add_argument("--stream", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user