vllm/examples/online_serving/openai_completion_client.py
CYJiang 71075029f2
[Doc] Support --stream arg in openai_completion_client.py script (#18388)
Signed-off-by: googs1025 <googs1025@gmail.com>
2025-05-22 13:20:17 +00:00

52 lines
1.2 KiB
Python

# SPDX-License-Identifier: Apache-2.0
import argparse
from openai import OpenAI
# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
def parse_args():
parser = argparse.ArgumentParser(description="Client for vLLM API server")
parser.add_argument("--stream",
action="store_true",
help="Enable streaming response")
return parser.parse_args()
def main(args):
client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
api_key=openai_api_key,
base_url=openai_api_base,
)
models = client.models.list()
model = models.data[0].id
# Completion API
completion = client.completions.create(
model=model,
prompt="A robot may not injure a human being",
echo=False,
n=2,
stream=args.stream,
logprobs=3)
print("-" * 50)
print("Completion results:")
if args.stream:
for c in completion:
print(c)
else:
print(completion)
print("-" * 50)
if __name__ == "__main__":
args = parse_args()
main(args)