From 1bcbcbf57417e6fa0427bc60f6acd11b86cb857f Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Thu, 24 Apr 2025 22:49:48 +0800 Subject: [PATCH] [Misc] refactor example series - structured outputs (#17040) Signed-off-by: reidliu41 Co-authored-by: reidliu41 --- ...enai_chat_completion_structured_outputs.py | 208 +++++++++++------- ...etion_structured_outputs_with_reasoning.py | 156 +++++++------ 2 files changed, 215 insertions(+), 149 deletions(-) diff --git a/examples/online_serving/openai_chat_completion_structured_outputs.py b/examples/online_serving/openai_chat_completion_structured_outputs.py index 1b690d19b4e81..f71162e36efd2 100644 --- a/examples/online_serving/openai_chat_completion_structured_outputs.py +++ b/examples/online_serving/openai_chat_completion_structured_outputs.py @@ -1,43 +1,49 @@ # SPDX-License-Identifier: Apache-2.0 +""" +To run this example, you need to start the vLLM server: + +```bash +vllm serve Qwen/Qwen2.5-3B-Instruct +``` +""" from enum import Enum from openai import BadRequestError, OpenAI from pydantic import BaseModel -client = OpenAI( - base_url="http://localhost:8000/v1", - api_key="-", -) # Guided decoding by Choice (list of possible options) -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": "Classify this sentiment: vLLM is wonderful!" - }], - extra_body={"guided_choice": ["positive", "negative"]}, -) -print(completion.choices[0].message.content) +def guided_choice_completion(client: OpenAI, model: str): + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": "Classify this sentiment: vLLM is wonderful!" + }], + extra_body={"guided_choice": ["positive", "negative"]}, + ) + return completion.choices[0].message.content + # Guided decoding by Regex -prompt = ("Generate an email address for Alan Turing, who works in Enigma." - "End in .com and new line. Example result:" - "alan.turing@enigma.com\n") +def guided_regex_completion(client: OpenAI, model: str): + prompt = ("Generate an email address for Alan Turing, who works in Enigma." + "End in .com and new line. Example result:" + "alan.turing@enigma.com\n") -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={ - "guided_regex": r"\w+@\w+\.com\n", - "stop": ["\n"] - }, -) -print(completion.choices[0].message.content) + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={ + "guided_regex": r"\w+@\w+\.com\n", + "stop": ["\n"] + }, + ) + return completion.choices[0].message.content # Guided decoding by JSON using Pydantic schema @@ -54,66 +60,100 @@ class CarDescription(BaseModel): car_type: CarType -json_schema = CarDescription.model_json_schema() +def guided_json_completion(client: OpenAI, model: str): + json_schema = CarDescription.model_json_schema() -prompt = ("Generate a JSON with the brand, model and car_type of" - "the most iconic car from the 90's") -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_json": json_schema}, -) -print(completion.choices[0].message.content) - -# Guided decoding by Grammar -simplified_sql_grammar = """ - root ::= select_statement - - select_statement ::= "SELECT " column " from " table " where " condition - - column ::= "col_1 " | "col_2 " - - table ::= "table_1 " | "table_2 " - - condition ::= column "= " number - - number ::= "1 " | "2 " -""" - -prompt = ("Generate an SQL query to show the 'username' and 'email'" - "from the 'users' table.") -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_grammar": simplified_sql_grammar}, -) -print(completion.choices[0].message.content) - -# Extra backend options -prompt = ("Generate an email address for Alan Turing, who works in Enigma." - "End in .com and new line. Example result:" - "alan.turing@enigma.com\n") - -try: - # The no-fallback option forces vLLM to use xgrammar, so when it fails - # you get a 400 with the reason why + prompt = ("Generate a JSON with the brand, model and car_type of" + "the most iconic car from the 90's") completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", + model=model, messages=[{ "role": "user", "content": prompt, }], - extra_body={ - "guided_regex": r"\w+@\w+\.com\n", - "stop": ["\n"], - "guided_decoding_backend": "xgrammar:no-fallback" - }, + extra_body={"guided_json": json_schema}, ) -except BadRequestError as e: - print("This error is expected:", e) + return completion.choices[0].message.content + + +# Guided decoding by Grammar +def guided_grammar_completion(client: OpenAI, model: str): + simplified_sql_grammar = """ + root ::= select_statement + + select_statement ::= "SELECT " column " from " table " where " condition + + column ::= "col_1 " | "col_2 " + + table ::= "table_1 " | "table_2 " + + condition ::= column "= " number + + number ::= "1 " | "2 " + """ + + prompt = ("Generate an SQL query to show the 'username' and 'email'" + "from the 'users' table.") + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={"guided_grammar": simplified_sql_grammar}, + ) + return completion.choices[0].message.content + + +# Extra backend options +def extra_backend_options_completion(client: OpenAI, model: str): + prompt = ("Generate an email address for Alan Turing, who works in Enigma." + "End in .com and new line. Example result:" + "alan.turing@enigma.com\n") + + try: + # The no-fallback option forces vLLM to use xgrammar, so when it fails + # you get a 400 with the reason why + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={ + "guided_regex": r"\w+@\w+\.com\n", + "stop": ["\n"], + "guided_decoding_backend": "xgrammar:no-fallback" + }, + ) + return completion.choices[0].message.content + except BadRequestError as e: + print("This error is expected:", e) + + +def main(): + client: OpenAI = OpenAI( + base_url="http://localhost:8000/v1", + api_key="-", + ) + + model = "Qwen/Qwen2.5-3B-Instruct" + + print("Guided Choice Completion:") + print(guided_choice_completion(client, model)) + + print("\nGuided Regex Completion:") + print(guided_regex_completion(client, model)) + + print("\nGuided JSON Completion:") + print(guided_json_completion(client, model)) + + print("\nGuided Grammar Completion:") + print(guided_grammar_completion(client, model)) + + print("\nExtra Backend Options Completion:") + print(extra_backend_options_completion(client, model)) + + +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py b/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py index be634401679c0..cb7f30d932554 100644 --- a/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py +++ b/examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py @@ -25,29 +25,28 @@ from pydantic import BaseModel openai_api_key = "EMPTY" openai_api_base = "http://localhost:8000/v1" -client = OpenAI( - api_key=openai_api_key, - base_url=openai_api_base, -) -models = client.models.list() -model = models.data[0].id +def print_completion_details(completion): + print("reasoning_content: ", + completion.choices[0].message.reasoning_content) + print("content: ", completion.choices[0].message.content) + # Guided decoding by Regex -prompt = ("What is the capital of France?") +def guided_regex_completion(client: OpenAI, model: str): + prompt = ("What is the capital of France?") -completion = client.chat.completions.create( - model=model, - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={ - "guided_regex": "(Paris|London)", - }, -) -print("reasoning_content: ", completion.choices[0].message.reasoning_content) -print("content: ", completion.choices[0].message.content) + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={ + "guided_regex": "(Paris|London)", + }, + ) + print_completion_details(completion) class People(BaseModel): @@ -55,19 +54,19 @@ class People(BaseModel): age: int -json_schema = People.model_json_schema() +def guided_json_completion(client: OpenAI, model: str): + json_schema = People.model_json_schema() -prompt = ("Generate a JSON with the name and age of one random person.") -completion = client.chat.completions.create( - model=model, - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_json": json_schema}, -) -print("reasoning_content: ", completion.choices[0].message.reasoning_content) -print("content: ", completion.choices[0].message.content) + prompt = ("Generate a JSON with the name and age of one random person.") + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={"guided_json": json_schema}, + ) + print_completion_details(completion) # Guided decoding by JSON using Pydantic schema @@ -84,46 +83,73 @@ class CarDescription(BaseModel): car_type: CarType -json_schema = CarDescription.model_json_schema() +def guided_car_json_completion(client: OpenAI, model: str): + json_schema = CarDescription.model_json_schema() + + prompt = ("Generate a JSON with the brand, model and car_type of" + "the most iconic car from the 90's") + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={"guided_json": json_schema}, + ) + print_completion_details(completion) -prompt = ("Generate a JSON with the brand, model and car_type of" - "the most iconic car from the 90's") -completion = client.chat.completions.create( - model=model, - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_json": json_schema}, -) -print("reasoning_content: ", completion.choices[0].message.reasoning_content) -print("content: ", completion.choices[0].message.content) # Guided decoding by Grammar -simplified_sql_grammar = """ - root ::= select_statement +def guided_grammar_completion(client: OpenAI, model: str): + simplified_sql_grammar = """ + root ::= select_statement - select_statement ::= "SELECT " column " from " table " where " condition + select_statement ::= "SELECT " column " from " table " where " condition - column ::= "col_1 " | "col_2 " + column ::= "col_1 " | "col_2 " - table ::= "table_1 " | "table_2 " + table ::= "table_1 " | "table_2 " - condition ::= column "= " number + condition ::= column "= " number - number ::= "1 " | "2 " -""" + number ::= "1 " | "2 " + """ -# This may be very slow https://github.com/vllm-project/vllm/issues/12122 -prompt = ("Generate an SQL query to show the 'username' and 'email'" - "from the 'users' table.") -completion = client.chat.completions.create( - model=model, - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_grammar": simplified_sql_grammar}, -) -print("reasoning_content: ", completion.choices[0].message.reasoning_content) -print("content: ", completion.choices[0].message.content) + # This may be very slow https://github.com/vllm-project/vllm/issues/12122 + prompt = ("Generate an SQL query to show the 'username' and 'email'" + "from the 'users' table.") + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={"guided_grammar": simplified_sql_grammar}, + ) + print_completion_details(completion) + + +def main(): + client: OpenAI = OpenAI( + api_key=openai_api_key, + base_url=openai_api_base, + ) + + models = client.models.list() + model: str = models.data[0].id + + print("Guided Regex Completion:") + guided_regex_completion(client, model) + + print("\nGuided JSON Completion (People):") + guided_json_completion(client, model) + + print("\nGuided JSON Completion (CarDescription):") + guided_car_json_completion(client, model) + + print("\nGuided Grammar Completion:") + guided_grammar_completion(client, model) + + +if __name__ == "__main__": + main()