From 71075029f214bd4db409ba553cf083a883fdd61f Mon Sep 17 00:00:00 2001
From: CYJiang <86391540+googs1025@users.noreply.github.com>
Date: Thu, 22 May 2025 21:20:17 +0800
Subject: [PATCH] [Doc] Support --stream arg in openai_completion_client.py
 script (#18388)

Signed-off-by: googs1025 <googs1025@gmail.com>
---
 ...enai_chat_completion_structured_outputs.py |  7 +++++--
 ...etion_structured_outputs_structural_tag.py |  7 +++++--
 .../openai_completion_client.py               | 20 ++++++++++++++-----
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/examples/online_serving/openai_chat_completion_structured_outputs.py b/examples/online_serving/openai_chat_completion_structured_outputs.py
index 660369e55d40e..722d747a69bf0 100644
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
@@ -12,6 +12,9 @@ from enum import Enum
 from openai import BadRequestError, OpenAI
 from pydantic import BaseModel
 
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+
 
 # Guided decoding by Choice (list of possible options)
 def guided_choice_completion(client: OpenAI, model: str):
@@ -134,8 +137,8 @@ def extra_backend_options_completion(client: OpenAI, model: str):
 
 def main():
     client: OpenAI = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="-",
+        base_url=openai_api_base,
+        api_key=openai_api_key,
     )
 
     model = client.models.list().data[0].id
diff --git a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
index 42aa12c451c04..08f9399425085 100644
--- a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
@@ -7,11 +7,14 @@ from openai import OpenAI
 # to enforce the format of a tool call response, but it could be used for
 # any structured output within a subset of the response.
 
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+
 
 def main():
     client = OpenAI(
-        base_url="http://localhost:8000/v1",
-        api_key="-",
+        base_url=openai_api_base,
+        api_key=openai_api_key,
     )
 
     messages = [{
diff --git a/examples/online_serving/openai_completion_client.py b/examples/online_serving/openai_completion_client.py
index 6ab7619bff192..77f721921da2f 100644
--- a/examples/online_serving/openai_completion_client.py
+++ b/examples/online_serving/openai_completion_client.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
+import argparse
+
 from openai import OpenAI
 
 # Modify OpenAI's API key and API base to use vLLM's API server.
@@ -7,7 +9,15 @@ openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
 
 
-def main():
+def parse_args():
+    parser = argparse.ArgumentParser(description="Client for vLLM API server")
+    parser.add_argument("--stream",
+                        action="store_true",
+                        help="Enable streaming response")
+    return parser.parse_args()
+
+
+def main(args):
     client = OpenAI(
         # defaults to os.environ.get("OPENAI_API_KEY")
         api_key=openai_api_key,
@@ -18,18 +28,17 @@ def main():
     model = models.data[0].id
 
     # Completion API
-    stream = False
     completion = client.completions.create(
         model=model,
         prompt="A robot may not injure a human being",
         echo=False,
         n=2,
-        stream=stream,
+        stream=args.stream,
         logprobs=3)
 
     print("-" * 50)
     print("Completion results:")
-    if stream:
+    if args.stream:
         for c in completion:
             print(c)
     else:
@@ -38,4 +47,5 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    args = parse_args()
+    main(args)