[Misc] refactor example series - structured outputs (#17040)

Signed-off-by: reidliu41 <reid201711@gmail.com>
Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
Reid 2025-04-24 22:49:48 +08:00 committed by GitHub
parent 82e43b2d7e
commit 1bcbcbf574
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 215 additions and 149 deletions

View File

@ -1,33 +1,39 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""
To run this example, you need to start the vLLM server:
```bash
vllm serve Qwen/Qwen2.5-3B-Instruct
```
"""
from enum import Enum from enum import Enum
from openai import BadRequestError, OpenAI from openai import BadRequestError, OpenAI
from pydantic import BaseModel from pydantic import BaseModel
client = OpenAI(
base_url="http://localhost:8000/v1",
api_key="-",
)
# Guided decoding by Choice (list of possible options) # Guided decoding by Choice (list of possible options)
completion = client.chat.completions.create( def guided_choice_completion(client: OpenAI, model: str):
model="Qwen/Qwen2.5-3B-Instruct", completion = client.chat.completions.create(
model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": "Classify this sentiment: vLLM is wonderful!" "content": "Classify this sentiment: vLLM is wonderful!"
}], }],
extra_body={"guided_choice": ["positive", "negative"]}, extra_body={"guided_choice": ["positive", "negative"]},
) )
print(completion.choices[0].message.content) return completion.choices[0].message.content
# Guided decoding by Regex # Guided decoding by Regex
prompt = ("Generate an email address for Alan Turing, who works in Enigma." def guided_regex_completion(client: OpenAI, model: str):
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:" "End in .com and new line. Example result:"
"alan.turing@enigma.com\n") "alan.turing@enigma.com\n")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct", model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
@ -36,8 +42,8 @@ completion = client.chat.completions.create(
"guided_regex": r"\w+@\w+\.com\n", "guided_regex": r"\w+@\w+\.com\n",
"stop": ["\n"] "stop": ["\n"]
}, },
) )
print(completion.choices[0].message.content) return completion.choices[0].message.content
# Guided decoding by JSON using Pydantic schema # Guided decoding by JSON using Pydantic schema
@ -54,22 +60,25 @@ class CarDescription(BaseModel):
car_type: CarType car_type: CarType
json_schema = CarDescription.model_json_schema() def guided_json_completion(client: OpenAI, model: str):
json_schema = CarDescription.model_json_schema()
prompt = ("Generate a JSON with the brand, model and car_type of" prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's") "the most iconic car from the 90's")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct", model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
}], }],
extra_body={"guided_json": json_schema}, extra_body={"guided_json": json_schema},
) )
print(completion.choices[0].message.content) return completion.choices[0].message.content
# Guided decoding by Grammar # Guided decoding by Grammar
simplified_sql_grammar = """ def guided_grammar_completion(client: OpenAI, model: str):
simplified_sql_grammar = """
root ::= select_statement root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition select_statement ::= "SELECT " column " from " table " where " condition
@ -81,30 +90,32 @@ simplified_sql_grammar = """
condition ::= column "= " number condition ::= column "= " number
number ::= "1 " | "2 " number ::= "1 " | "2 "
""" """
prompt = ("Generate an SQL query to show the 'username' and 'email'" prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.") "from the 'users' table.")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct", model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
}], }],
extra_body={"guided_grammar": simplified_sql_grammar}, extra_body={"guided_grammar": simplified_sql_grammar},
) )
print(completion.choices[0].message.content) return completion.choices[0].message.content
# Extra backend options # Extra backend options
prompt = ("Generate an email address for Alan Turing, who works in Enigma." def extra_backend_options_completion(client: OpenAI, model: str):
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:" "End in .com and new line. Example result:"
"alan.turing@enigma.com\n") "alan.turing@enigma.com\n")
try: try:
# The no-fallback option forces vLLM to use xgrammar, so when it fails # The no-fallback option forces vLLM to use xgrammar, so when it fails
# you get a 400 with the reason why # you get a 400 with the reason why
completion = client.chat.completions.create( completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct", model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
@ -115,5 +126,34 @@ try:
"guided_decoding_backend": "xgrammar:no-fallback" "guided_decoding_backend": "xgrammar:no-fallback"
}, },
) )
except BadRequestError as e: return completion.choices[0].message.content
except BadRequestError as e:
print("This error is expected:", e) print("This error is expected:", e)
def main():
client: OpenAI = OpenAI(
base_url="http://localhost:8000/v1",
api_key="-",
)
model = "Qwen/Qwen2.5-3B-Instruct"
print("Guided Choice Completion:")
print(guided_choice_completion(client, model))
print("\nGuided Regex Completion:")
print(guided_regex_completion(client, model))
print("\nGuided JSON Completion:")
print(guided_json_completion(client, model))
print("\nGuided Grammar Completion:")
print(guided_grammar_completion(client, model))
print("\nExtra Backend Options Completion:")
print(extra_backend_options_completion(client, model))
if __name__ == "__main__":
main()

View File

@ -25,18 +25,18 @@ from pydantic import BaseModel
openai_api_key = "EMPTY" openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1" openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
models = client.models.list() def print_completion_details(completion):
model = models.data[0].id print("reasoning_content: ",
completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
# Guided decoding by Regex # Guided decoding by Regex
prompt = ("What is the capital of France?") def guided_regex_completion(client: OpenAI, model: str):
prompt = ("What is the capital of France?")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model=model, model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
@ -45,9 +45,8 @@ completion = client.chat.completions.create(
extra_body={ extra_body={
"guided_regex": "(Paris|London)", "guided_regex": "(Paris|London)",
}, },
) )
print("reasoning_content: ", completion.choices[0].message.reasoning_content) print_completion_details(completion)
print("content: ", completion.choices[0].message.content)
class People(BaseModel): class People(BaseModel):
@ -55,19 +54,19 @@ class People(BaseModel):
age: int age: int
json_schema = People.model_json_schema() def guided_json_completion(client: OpenAI, model: str):
json_schema = People.model_json_schema()
prompt = ("Generate a JSON with the name and age of one random person.") prompt = ("Generate a JSON with the name and age of one random person.")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model=model, model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
}], }],
extra_body={"guided_json": json_schema}, extra_body={"guided_json": json_schema},
) )
print("reasoning_content: ", completion.choices[0].message.reasoning_content) print_completion_details(completion)
print("content: ", completion.choices[0].message.content)
# Guided decoding by JSON using Pydantic schema # Guided decoding by JSON using Pydantic schema
@ -84,23 +83,25 @@ class CarDescription(BaseModel):
car_type: CarType car_type: CarType
json_schema = CarDescription.model_json_schema() def guided_car_json_completion(client: OpenAI, model: str):
json_schema = CarDescription.model_json_schema()
prompt = ("Generate a JSON with the brand, model and car_type of" prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's") "the most iconic car from the 90's")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model=model, model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
}], }],
extra_body={"guided_json": json_schema}, extra_body={"guided_json": json_schema},
) )
print("reasoning_content: ", completion.choices[0].message.reasoning_content) print_completion_details(completion)
print("content: ", completion.choices[0].message.content)
# Guided decoding by Grammar # Guided decoding by Grammar
simplified_sql_grammar = """ def guided_grammar_completion(client: OpenAI, model: str):
simplified_sql_grammar = """
root ::= select_statement root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition select_statement ::= "SELECT " column " from " table " where " condition
@ -112,18 +113,43 @@ simplified_sql_grammar = """
condition ::= column "= " number condition ::= column "= " number
number ::= "1 " | "2 " number ::= "1 " | "2 "
""" """
# This may be very slow https://github.com/vllm-project/vllm/issues/12122 # This may be very slow https://github.com/vllm-project/vllm/issues/12122
prompt = ("Generate an SQL query to show the 'username' and 'email'" prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.") "from the 'users' table.")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model=model, model=model,
messages=[{ messages=[{
"role": "user", "role": "user",
"content": prompt, "content": prompt,
}], }],
extra_body={"guided_grammar": simplified_sql_grammar}, extra_body={"guided_grammar": simplified_sql_grammar},
) )
print("reasoning_content: ", completion.choices[0].message.reasoning_content) print_completion_details(completion)
print("content: ", completion.choices[0].message.content)
def main():
client: OpenAI = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
models = client.models.list()
model: str = models.data[0].id
print("Guided Regex Completion:")
guided_regex_completion(client, model)
print("\nGuided JSON Completion (People):")
guided_json_completion(client, model)
print("\nGuided JSON Completion (CarDescription):")
guided_car_json_completion(client, model)
print("\nGuided Grammar Completion:")
guided_grammar_completion(client, model)
if __name__ == "__main__":
main()