[Misc] refactor example series - structured outputs (#17040)

Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
2026-07-24 22:37:30 +08:00 · 2025-04-24 22:49:48 +08:00 · 2025-04-24 22:49:48 +08:00 · 1bcbcbf574
commit 1bcbcbf574
parent 82e43b2d7e
2 changed files with 215 additions and 149 deletions
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
@ -1,43 +1,49 @@
 # SPDX-License-Identifier: Apache-2.0
 """
 To run this example, you need to start the vLLM server:
 ```bash
 vllm serve Qwen/Qwen2.5-3B-Instruct
 ```
 """
 from enum import Enum
 from openai import BadRequestError, OpenAI
 from pydantic import BaseModel
 client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="-",
 )
 # Guided decoding by Choice (list of possible options)
-completion = client.chat.completions.create(
+def guided_choice_completion(client: OpenAI, model: str):
-    model="Qwen/Qwen2.5-3B-Instruct",
+    completion = client.chat.completions.create(
-    messages=[{
+        model=model,
-        "role": "user",
+        messages=[{
-        "content": "Classify this sentiment: vLLM is wonderful!"
+            "role": "user",
-    }],
+            "content": "Classify this sentiment: vLLM is wonderful!"
-    extra_body={"guided_choice": ["positive", "negative"]},
+        }],
-)
+        extra_body={"guided_choice": ["positive", "negative"]},
-print(completion.choices[0].message.content)
+    )
    return completion.choices[0].message.content
 # Guided decoding by Regex
-prompt = ("Generate an email address for Alan Turing, who works in Enigma."
+def guided_regex_completion(client: OpenAI, model: str):
-          "End in .com and new line. Example result:"
+    prompt = ("Generate an email address for Alan Turing, who works in Enigma."
-          "alan.turing@enigma.com\n")
+              "End in .com and new line. Example result:"
              "alan.turing@enigma.com\n")
-completion = client.chat.completions.create(
+    completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-3B-Instruct",
+        model=model,
-    messages=[{
+        messages=[{
-        "role": "user",
+            "role": "user",
-        "content": prompt,
+            "content": prompt,
-    }],
+        }],
-    extra_body={
+        extra_body={
-        "guided_regex": r"\w+@\w+\.com\n",
+            "guided_regex": r"\w+@\w+\.com\n",
-        "stop": ["\n"]
+            "stop": ["\n"]
-    },
+        },
-)
+    )
-print(completion.choices[0].message.content)
+    return completion.choices[0].message.content
 # Guided decoding by JSON using Pydantic schema
@ -54,66 +60,100 @@ class CarDescription(BaseModel):
    car_type: CarType
-json_schema = CarDescription.model_json_schema()
+def guided_json_completion(client: OpenAI, model: str):
    json_schema = CarDescription.model_json_schema()
-prompt = ("Generate a JSON with the brand, model and car_type of"
+    prompt = ("Generate a JSON with the brand, model and car_type of"
-          "the most iconic car from the 90's")
+              "the most iconic car from the 90's")
 completion = client.chat.completions.create(
    model="Qwen/Qwen2.5-3B-Instruct",
    messages=[{
        "role": "user",
        "content": prompt,
    }],
    extra_body={"guided_json": json_schema},
 )
 print(completion.choices[0].message.content)
 # Guided decoding by Grammar
 simplified_sql_grammar = """
    root ::= select_statement
    select_statement ::= "SELECT " column " from " table " where " condition
    column ::= "col_1 " | "col_2 "
    table ::= "table_1 " | "table_2 "
    condition ::= column "= " number
    number ::= "1 " | "2 "
 """
 prompt = ("Generate an SQL query to show the 'username' and 'email'"
          "from the 'users' table.")
 completion = client.chat.completions.create(
    model="Qwen/Qwen2.5-3B-Instruct",
    messages=[{
        "role": "user",
        "content": prompt,
    }],
    extra_body={"guided_grammar": simplified_sql_grammar},
 )
 print(completion.choices[0].message.content)
 # Extra backend options
 prompt = ("Generate an email address for Alan Turing, who works in Enigma."
          "End in .com and new line. Example result:"
          "alan.turing@enigma.com\n")
 try:
    # The no-fallback option forces vLLM to use xgrammar, so when it fails
    # you get a 400 with the reason why
    completion = client.chat.completions.create(
-        model="Qwen/Qwen2.5-3B-Instruct",
+        model=model,
        messages=[{
            "role": "user",
            "content": prompt,
        }],
-        extra_body={
+        extra_body={"guided_json": json_schema},
            "guided_regex": r"\w+@\w+\.com\n",
            "stop": ["\n"],
            "guided_decoding_backend": "xgrammar:no-fallback"
        },
    )
-except BadRequestError as e:
+    return completion.choices[0].message.content
-    print("This error is expected:", e)
+
 # Guided decoding by Grammar
 def guided_grammar_completion(client: OpenAI, model: str):
    simplified_sql_grammar = """
        root ::= select_statement
        select_statement ::= "SELECT " column " from " table " where " condition
        column ::= "col_1 " | "col_2 "
        table ::= "table_1 " | "table_2 "
        condition ::= column "= " number
        number ::= "1 " | "2 "
    """
    prompt = ("Generate an SQL query to show the 'username' and 'email'"
              "from the 'users' table.")
    completion = client.chat.completions.create(
        model=model,
        messages=[{
            "role": "user",
            "content": prompt,
        }],
        extra_body={"guided_grammar": simplified_sql_grammar},
    )
    return completion.choices[0].message.content
 # Extra backend options
 def extra_backend_options_completion(client: OpenAI, model: str):
    prompt = ("Generate an email address for Alan Turing, who works in Enigma."
              "End in .com and new line. Example result:"
              "alan.turing@enigma.com\n")
    try:
        # The no-fallback option forces vLLM to use xgrammar, so when it fails
        # you get a 400 with the reason why
        completion = client.chat.completions.create(
            model=model,
            messages=[{
                "role": "user",
                "content": prompt,
            }],
            extra_body={
                "guided_regex": r"\w+@\w+\.com\n",
                "stop": ["\n"],
                "guided_decoding_backend": "xgrammar:no-fallback"
            },
        )
        return completion.choices[0].message.content
    except BadRequestError as e:
        print("This error is expected:", e)
 def main():
    client: OpenAI = OpenAI(
        base_url="http://localhost:8000/v1",
        api_key="-",
    )
    model = "Qwen/Qwen2.5-3B-Instruct"
    print("Guided Choice Completion:")
    print(guided_choice_completion(client, model))
    print("\nGuided Regex Completion:")
    print(guided_regex_completion(client, model))
    print("\nGuided JSON Completion:")
    print(guided_json_completion(client, model))
    print("\nGuided Grammar Completion:")
    print(guided_grammar_completion(client, model))
    print("\nExtra Backend Options Completion:")
    print(extra_backend_options_completion(client, model))
 if __name__ == "__main__":
    main()
--- a/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
@ -25,29 +25,28 @@ from pydantic import BaseModel
 openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
 client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
 )
-models = client.models.list()
+def print_completion_details(completion):
-model = models.data[0].id
+    print("reasoning_content: ",
          completion.choices[0].message.reasoning_content)
    print("content: ", completion.choices[0].message.content)
 # Guided decoding by Regex
-prompt = ("What is the capital of France?")
+def guided_regex_completion(client: OpenAI, model: str):
    prompt = ("What is the capital of France?")
-completion = client.chat.completions.create(
+    completion = client.chat.completions.create(
-    model=model,
+        model=model,
-    messages=[{
+        messages=[{
-        "role": "user",
+            "role": "user",
-        "content": prompt,
+            "content": prompt,
-    }],
+        }],
-    extra_body={
+        extra_body={
-        "guided_regex": "(Paris|London)",
+            "guided_regex": "(Paris|London)",
-    },
+        },
-)
+    )
-print("reasoning_content: ", completion.choices[0].message.reasoning_content)
+    print_completion_details(completion)
 print("content: ", completion.choices[0].message.content)
 class People(BaseModel):
@ -55,19 +54,19 @@ class People(BaseModel):
    age: int
-json_schema = People.model_json_schema()
+def guided_json_completion(client: OpenAI, model: str):
    json_schema = People.model_json_schema()
-prompt = ("Generate a JSON with the name and age of one random person.")
+    prompt = ("Generate a JSON with the name and age of one random person.")
-completion = client.chat.completions.create(
+    completion = client.chat.completions.create(
-    model=model,
+        model=model,
-    messages=[{
+        messages=[{
-        "role": "user",
+            "role": "user",
-        "content": prompt,
+            "content": prompt,
-    }],
+        }],
-    extra_body={"guided_json": json_schema},
+        extra_body={"guided_json": json_schema},
-)
+    )
-print("reasoning_content: ", completion.choices[0].message.reasoning_content)
+    print_completion_details(completion)
 print("content: ", completion.choices[0].message.content)
 # Guided decoding by JSON using Pydantic schema
@ -84,46 +83,73 @@ class CarDescription(BaseModel):
    car_type: CarType
-json_schema = CarDescription.model_json_schema()
+def guided_car_json_completion(client: OpenAI, model: str):
    json_schema = CarDescription.model_json_schema()
    prompt = ("Generate a JSON with the brand, model and car_type of"
              "the most iconic car from the 90's")
    completion = client.chat.completions.create(
        model=model,
        messages=[{
            "role": "user",
            "content": prompt,
        }],
        extra_body={"guided_json": json_schema},
    )
    print_completion_details(completion)
 prompt = ("Generate a JSON with the brand, model and car_type of"
          "the most iconic car from the 90's")
 completion = client.chat.completions.create(
    model=model,
    messages=[{
        "role": "user",
        "content": prompt,
    }],
    extra_body={"guided_json": json_schema},
 )
 print("reasoning_content: ", completion.choices[0].message.reasoning_content)
 print("content: ", completion.choices[0].message.content)
 # Guided decoding by Grammar
-simplified_sql_grammar = """
+def guided_grammar_completion(client: OpenAI, model: str):
-    root ::= select_statement
+    simplified_sql_grammar = """
        root ::= select_statement
-    select_statement ::= "SELECT " column " from " table " where " condition
+        select_statement ::= "SELECT " column " from " table " where " condition
-    column ::= "col_1 " | "col_2 "
+        column ::= "col_1 " | "col_2 "
-    table ::= "table_1 " | "table_2 "
+        table ::= "table_1 " | "table_2 "
-    condition ::= column "= " number
+        condition ::= column "= " number
-    number ::= "1 " | "2 "
+        number ::= "1 " | "2 "
-"""
+    """
-# This may be very slow https://github.com/vllm-project/vllm/issues/12122
+    # This may be very slow https://github.com/vllm-project/vllm/issues/12122
-prompt = ("Generate an SQL query to show the 'username' and 'email'"
+    prompt = ("Generate an SQL query to show the 'username' and 'email'"
-          "from the 'users' table.")
+              "from the 'users' table.")
-completion = client.chat.completions.create(
+    completion = client.chat.completions.create(
-    model=model,
+        model=model,
-    messages=[{
+        messages=[{
-        "role": "user",
+            "role": "user",
-        "content": prompt,
+            "content": prompt,
-    }],
+        }],
-    extra_body={"guided_grammar": simplified_sql_grammar},
+        extra_body={"guided_grammar": simplified_sql_grammar},
-)
+    )
-print("reasoning_content: ", completion.choices[0].message.reasoning_content)
+    print_completion_details(completion)
-print("content: ", completion.choices[0].message.content)
+
 def main():
    client: OpenAI = OpenAI(
        api_key=openai_api_key,
        base_url=openai_api_base,
    )
    models = client.models.list()
    model: str = models.data[0].id
    print("Guided Regex Completion:")
    guided_regex_completion(client, model)
    print("\nGuided JSON Completion (People):")
    guided_json_completion(client, model)
    print("\nGuided JSON Completion (CarDescription):")
    guided_car_json_completion(client, model)
    print("\nGuided Grammar Completion:")
    guided_grammar_completion(client, model)
 if __name__ == "__main__":
    main()