From 71075029f214bd4db409ba553cf083a883fdd61f Mon Sep 17 00:00:00 2001 From: CYJiang <86391540+googs1025@users.noreply.github.com> Date: Thu, 22 May 2025 21:20:17 +0800 Subject: [PATCH] [Doc] Support --stream arg in openai_completion_client.py script (#18388) Signed-off-by: googs1025 --- ...enai_chat_completion_structured_outputs.py | 7 +++++-- ...etion_structured_outputs_structural_tag.py | 7 +++++-- .../openai_completion_client.py | 20 ++++++++++++++----- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/examples/online_serving/openai_chat_completion_structured_outputs.py b/examples/online_serving/openai_chat_completion_structured_outputs.py index 660369e55d40e..722d747a69bf0 100644 --- a/examples/online_serving/openai_chat_completion_structured_outputs.py +++ b/examples/online_serving/openai_chat_completion_structured_outputs.py @@ -12,6 +12,9 @@ from enum import Enum from openai import BadRequestError, OpenAI from pydantic import BaseModel +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + # Guided decoding by Choice (list of possible options) def guided_choice_completion(client: OpenAI, model: str): @@ -134,8 +137,8 @@ def extra_backend_options_completion(client: OpenAI, model: str): def main(): client: OpenAI = OpenAI( - base_url="http://localhost:8000/v1", - api_key="-", + base_url=openai_api_base, + api_key=openai_api_key, ) model = client.models.list().data[0].id diff --git a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py index 42aa12c451c04..08f9399425085 100644 --- a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py +++ b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py @@ -7,11 +7,14 @@ from openai import OpenAI # to enforce the format of a tool call response, but it could be used for # any structured output within a subset of the response. +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + def main(): client = OpenAI( - base_url="http://localhost:8000/v1", - api_key="-", + base_url=openai_api_base, + api_key=openai_api_key, ) messages = [{ diff --git a/examples/online_serving/openai_completion_client.py b/examples/online_serving/openai_completion_client.py index 6ab7619bff192..77f721921da2f 100644 --- a/examples/online_serving/openai_completion_client.py +++ b/examples/online_serving/openai_completion_client.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import argparse + from openai import OpenAI # Modify OpenAI's API key and API base to use vLLM's API server. @@ -7,7 +9,15 @@ openai_api_key = "EMPTY" openai_api_base = "http://localhost:8000/v1" -def main(): +def parse_args(): + parser = argparse.ArgumentParser(description="Client for vLLM API server") + parser.add_argument("--stream", + action="store_true", + help="Enable streaming response") + return parser.parse_args() + + +def main(args): client = OpenAI( # defaults to os.environ.get("OPENAI_API_KEY") api_key=openai_api_key, @@ -18,18 +28,17 @@ def main(): model = models.data[0].id # Completion API - stream = False completion = client.completions.create( model=model, prompt="A robot may not injure a human being", echo=False, n=2, - stream=stream, + stream=args.stream, logprobs=3) print("-" * 50) print("Completion results:") - if stream: + if args.stream: for c in completion: print(c) else: @@ -38,4 +47,5 @@ def main(): if __name__ == "__main__": - main() + args = parse_args() + main(args)