[Misc] refactor example series - structured outputs (#17040)

Signed-off-by: reidliu41 <reid201711@gmail.com>
Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
Reid 2025-04-24 22:49:48 +08:00 committed by GitHub
parent 82e43b2d7e
commit 1bcbcbf574
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 215 additions and 149 deletions

View File

@ -1,43 +1,49 @@
# SPDX-License-Identifier: Apache-2.0
"""
To run this example, you need to start the vLLM server:
```bash
vllm serve Qwen/Qwen2.5-3B-Instruct
```
"""
from enum import Enum
from openai import BadRequestError, OpenAI
from pydantic import BaseModel
client = OpenAI(
base_url="http://localhost:8000/v1",
api_key="-",
)
# Guided decoding by Choice (list of possible options)
completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct",
messages=[{
"role": "user",
"content": "Classify this sentiment: vLLM is wonderful!"
}],
extra_body={"guided_choice": ["positive", "negative"]},
)
print(completion.choices[0].message.content)
def guided_choice_completion(client: OpenAI, model: str):
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": "Classify this sentiment: vLLM is wonderful!"
}],
extra_body={"guided_choice": ["positive", "negative"]},
)
return completion.choices[0].message.content
# Guided decoding by Regex
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:"
"alan.turing@enigma.com\n")
def guided_regex_completion(client: OpenAI, model: str):
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:"
"alan.turing@enigma.com\n")
completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct",
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": r"\w+@\w+\.com\n",
"stop": ["\n"]
},
)
print(completion.choices[0].message.content)
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": r"\w+@\w+\.com\n",
"stop": ["\n"]
},
)
return completion.choices[0].message.content
# Guided decoding by JSON using Pydantic schema
@ -54,66 +60,100 @@ class CarDescription(BaseModel):
car_type: CarType
json_schema = CarDescription.model_json_schema()
def guided_json_completion(client: OpenAI, model: str):
json_schema = CarDescription.model_json_schema()
prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's")
completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct",
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print(completion.choices[0].message.content)
# Guided decoding by Grammar
simplified_sql_grammar = """
root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition
column ::= "col_1 " | "col_2 "
table ::= "table_1 " | "table_2 "
condition ::= column "= " number
number ::= "1 " | "2 "
"""
prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.")
completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct",
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_grammar": simplified_sql_grammar},
)
print(completion.choices[0].message.content)
# Extra backend options
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:"
"alan.turing@enigma.com\n")
try:
# The no-fallback option forces vLLM to use xgrammar, so when it fails
# you get a 400 with the reason why
prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's")
completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct",
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": r"\w+@\w+\.com\n",
"stop": ["\n"],
"guided_decoding_backend": "xgrammar:no-fallback"
},
extra_body={"guided_json": json_schema},
)
except BadRequestError as e:
print("This error is expected:", e)
return completion.choices[0].message.content
# Guided decoding by Grammar
def guided_grammar_completion(client: OpenAI, model: str):
simplified_sql_grammar = """
root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition
column ::= "col_1 " | "col_2 "
table ::= "table_1 " | "table_2 "
condition ::= column "= " number
number ::= "1 " | "2 "
"""
prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_grammar": simplified_sql_grammar},
)
return completion.choices[0].message.content
# Extra backend options
def extra_backend_options_completion(client: OpenAI, model: str):
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:"
"alan.turing@enigma.com\n")
try:
# The no-fallback option forces vLLM to use xgrammar, so when it fails
# you get a 400 with the reason why
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": r"\w+@\w+\.com\n",
"stop": ["\n"],
"guided_decoding_backend": "xgrammar:no-fallback"
},
)
return completion.choices[0].message.content
except BadRequestError as e:
print("This error is expected:", e)
def main():
client: OpenAI = OpenAI(
base_url="http://localhost:8000/v1",
api_key="-",
)
model = "Qwen/Qwen2.5-3B-Instruct"
print("Guided Choice Completion:")
print(guided_choice_completion(client, model))
print("\nGuided Regex Completion:")
print(guided_regex_completion(client, model))
print("\nGuided JSON Completion:")
print(guided_json_completion(client, model))
print("\nGuided Grammar Completion:")
print(guided_grammar_completion(client, model))
print("\nExtra Backend Options Completion:")
print(extra_backend_options_completion(client, model))
if __name__ == "__main__":
main()

View File

@ -25,29 +25,28 @@ from pydantic import BaseModel
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
models = client.models.list()
model = models.data[0].id
def print_completion_details(completion):
print("reasoning_content: ",
completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
# Guided decoding by Regex
prompt = ("What is the capital of France?")
def guided_regex_completion(client: OpenAI, model: str):
prompt = ("What is the capital of France?")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": "(Paris|London)",
},
)
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": "(Paris|London)",
},
)
print_completion_details(completion)
class People(BaseModel):
@ -55,19 +54,19 @@ class People(BaseModel):
age: int
json_schema = People.model_json_schema()
def guided_json_completion(client: OpenAI, model: str):
json_schema = People.model_json_schema()
prompt = ("Generate a JSON with the name and age of one random person.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
prompt = ("Generate a JSON with the name and age of one random person.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print_completion_details(completion)
# Guided decoding by JSON using Pydantic schema
@ -84,46 +83,73 @@ class CarDescription(BaseModel):
car_type: CarType
json_schema = CarDescription.model_json_schema()
def guided_car_json_completion(client: OpenAI, model: str):
json_schema = CarDescription.model_json_schema()
prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print_completion_details(completion)
prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
# Guided decoding by Grammar
simplified_sql_grammar = """
root ::= select_statement
def guided_grammar_completion(client: OpenAI, model: str):
simplified_sql_grammar = """
root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition
select_statement ::= "SELECT " column " from " table " where " condition
column ::= "col_1 " | "col_2 "
column ::= "col_1 " | "col_2 "
table ::= "table_1 " | "table_2 "
table ::= "table_1 " | "table_2 "
condition ::= column "= " number
condition ::= column "= " number
number ::= "1 " | "2 "
"""
number ::= "1 " | "2 "
"""
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_grammar": simplified_sql_grammar},
)
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_grammar": simplified_sql_grammar},
)
print_completion_details(completion)
def main():
client: OpenAI = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
models = client.models.list()
model: str = models.data[0].id
print("Guided Regex Completion:")
guided_regex_completion(client, model)
print("\nGuided JSON Completion (People):")
guided_json_completion(client, model)
print("\nGuided JSON Completion (CarDescription):")
guided_car_json_completion(client, model)
print("\nGuided Grammar Completion:")
guided_grammar_completion(client, model)
if __name__ == "__main__":
main()