mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-24 17:44:30 +08:00
[Misc] refactor example series - structured outputs (#17040)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
parent
82e43b2d7e
commit
1bcbcbf574
@ -1,43 +1,49 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
"""
|
||||
To run this example, you need to start the vLLM server:
|
||||
|
||||
```bash
|
||||
vllm serve Qwen/Qwen2.5-3B-Instruct
|
||||
```
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from openai import BadRequestError, OpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
client = OpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
api_key="-",
|
||||
)
|
||||
|
||||
# Guided decoding by Choice (list of possible options)
|
||||
completion = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-3B-Instruct",
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "Classify this sentiment: vLLM is wonderful!"
|
||||
}],
|
||||
extra_body={"guided_choice": ["positive", "negative"]},
|
||||
)
|
||||
print(completion.choices[0].message.content)
|
||||
def guided_choice_completion(client: OpenAI, model: str):
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "Classify this sentiment: vLLM is wonderful!"
|
||||
}],
|
||||
extra_body={"guided_choice": ["positive", "negative"]},
|
||||
)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
|
||||
# Guided decoding by Regex
|
||||
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
||||
"End in .com and new line. Example result:"
|
||||
"alan.turing@enigma.com\n")
|
||||
def guided_regex_completion(client: OpenAI, model: str):
|
||||
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
||||
"End in .com and new line. Example result:"
|
||||
"alan.turing@enigma.com\n")
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-3B-Instruct",
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={
|
||||
"guided_regex": r"\w+@\w+\.com\n",
|
||||
"stop": ["\n"]
|
||||
},
|
||||
)
|
||||
print(completion.choices[0].message.content)
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={
|
||||
"guided_regex": r"\w+@\w+\.com\n",
|
||||
"stop": ["\n"]
|
||||
},
|
||||
)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
|
||||
# Guided decoding by JSON using Pydantic schema
|
||||
@ -54,66 +60,100 @@ class CarDescription(BaseModel):
|
||||
car_type: CarType
|
||||
|
||||
|
||||
json_schema = CarDescription.model_json_schema()
|
||||
def guided_json_completion(client: OpenAI, model: str):
|
||||
json_schema = CarDescription.model_json_schema()
|
||||
|
||||
prompt = ("Generate a JSON with the brand, model and car_type of"
|
||||
"the most iconic car from the 90's")
|
||||
completion = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-3B-Instruct",
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_json": json_schema},
|
||||
)
|
||||
print(completion.choices[0].message.content)
|
||||
|
||||
# Guided decoding by Grammar
|
||||
simplified_sql_grammar = """
|
||||
root ::= select_statement
|
||||
|
||||
select_statement ::= "SELECT " column " from " table " where " condition
|
||||
|
||||
column ::= "col_1 " | "col_2 "
|
||||
|
||||
table ::= "table_1 " | "table_2 "
|
||||
|
||||
condition ::= column "= " number
|
||||
|
||||
number ::= "1 " | "2 "
|
||||
"""
|
||||
|
||||
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
||||
"from the 'users' table.")
|
||||
completion = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-3B-Instruct",
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_grammar": simplified_sql_grammar},
|
||||
)
|
||||
print(completion.choices[0].message.content)
|
||||
|
||||
# Extra backend options
|
||||
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
||||
"End in .com and new line. Example result:"
|
||||
"alan.turing@enigma.com\n")
|
||||
|
||||
try:
|
||||
# The no-fallback option forces vLLM to use xgrammar, so when it fails
|
||||
# you get a 400 with the reason why
|
||||
prompt = ("Generate a JSON with the brand, model and car_type of"
|
||||
"the most iconic car from the 90's")
|
||||
completion = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-3B-Instruct",
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={
|
||||
"guided_regex": r"\w+@\w+\.com\n",
|
||||
"stop": ["\n"],
|
||||
"guided_decoding_backend": "xgrammar:no-fallback"
|
||||
},
|
||||
extra_body={"guided_json": json_schema},
|
||||
)
|
||||
except BadRequestError as e:
|
||||
print("This error is expected:", e)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
|
||||
# Guided decoding by Grammar
|
||||
def guided_grammar_completion(client: OpenAI, model: str):
|
||||
simplified_sql_grammar = """
|
||||
root ::= select_statement
|
||||
|
||||
select_statement ::= "SELECT " column " from " table " where " condition
|
||||
|
||||
column ::= "col_1 " | "col_2 "
|
||||
|
||||
table ::= "table_1 " | "table_2 "
|
||||
|
||||
condition ::= column "= " number
|
||||
|
||||
number ::= "1 " | "2 "
|
||||
"""
|
||||
|
||||
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
||||
"from the 'users' table.")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_grammar": simplified_sql_grammar},
|
||||
)
|
||||
return completion.choices[0].message.content
|
||||
|
||||
|
||||
# Extra backend options
|
||||
def extra_backend_options_completion(client: OpenAI, model: str):
|
||||
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
||||
"End in .com and new line. Example result:"
|
||||
"alan.turing@enigma.com\n")
|
||||
|
||||
try:
|
||||
# The no-fallback option forces vLLM to use xgrammar, so when it fails
|
||||
# you get a 400 with the reason why
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={
|
||||
"guided_regex": r"\w+@\w+\.com\n",
|
||||
"stop": ["\n"],
|
||||
"guided_decoding_backend": "xgrammar:no-fallback"
|
||||
},
|
||||
)
|
||||
return completion.choices[0].message.content
|
||||
except BadRequestError as e:
|
||||
print("This error is expected:", e)
|
||||
|
||||
|
||||
def main():
|
||||
client: OpenAI = OpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
api_key="-",
|
||||
)
|
||||
|
||||
model = "Qwen/Qwen2.5-3B-Instruct"
|
||||
|
||||
print("Guided Choice Completion:")
|
||||
print(guided_choice_completion(client, model))
|
||||
|
||||
print("\nGuided Regex Completion:")
|
||||
print(guided_regex_completion(client, model))
|
||||
|
||||
print("\nGuided JSON Completion:")
|
||||
print(guided_json_completion(client, model))
|
||||
|
||||
print("\nGuided Grammar Completion:")
|
||||
print(guided_grammar_completion(client, model))
|
||||
|
||||
print("\nExtra Backend Options Completion:")
|
||||
print(extra_backend_options_completion(client, model))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@ -25,29 +25,28 @@ from pydantic import BaseModel
|
||||
openai_api_key = "EMPTY"
|
||||
openai_api_base = "http://localhost:8000/v1"
|
||||
|
||||
client = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
|
||||
models = client.models.list()
|
||||
model = models.data[0].id
|
||||
def print_completion_details(completion):
|
||||
print("reasoning_content: ",
|
||||
completion.choices[0].message.reasoning_content)
|
||||
print("content: ", completion.choices[0].message.content)
|
||||
|
||||
|
||||
# Guided decoding by Regex
|
||||
prompt = ("What is the capital of France?")
|
||||
def guided_regex_completion(client: OpenAI, model: str):
|
||||
prompt = ("What is the capital of France?")
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={
|
||||
"guided_regex": "(Paris|London)",
|
||||
},
|
||||
)
|
||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
||||
print("content: ", completion.choices[0].message.content)
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={
|
||||
"guided_regex": "(Paris|London)",
|
||||
},
|
||||
)
|
||||
print_completion_details(completion)
|
||||
|
||||
|
||||
class People(BaseModel):
|
||||
@ -55,19 +54,19 @@ class People(BaseModel):
|
||||
age: int
|
||||
|
||||
|
||||
json_schema = People.model_json_schema()
|
||||
def guided_json_completion(client: OpenAI, model: str):
|
||||
json_schema = People.model_json_schema()
|
||||
|
||||
prompt = ("Generate a JSON with the name and age of one random person.")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_json": json_schema},
|
||||
)
|
||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
||||
print("content: ", completion.choices[0].message.content)
|
||||
prompt = ("Generate a JSON with the name and age of one random person.")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_json": json_schema},
|
||||
)
|
||||
print_completion_details(completion)
|
||||
|
||||
|
||||
# Guided decoding by JSON using Pydantic schema
|
||||
@ -84,46 +83,73 @@ class CarDescription(BaseModel):
|
||||
car_type: CarType
|
||||
|
||||
|
||||
json_schema = CarDescription.model_json_schema()
|
||||
def guided_car_json_completion(client: OpenAI, model: str):
|
||||
json_schema = CarDescription.model_json_schema()
|
||||
|
||||
prompt = ("Generate a JSON with the brand, model and car_type of"
|
||||
"the most iconic car from the 90's")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_json": json_schema},
|
||||
)
|
||||
print_completion_details(completion)
|
||||
|
||||
prompt = ("Generate a JSON with the brand, model and car_type of"
|
||||
"the most iconic car from the 90's")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_json": json_schema},
|
||||
)
|
||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
||||
print("content: ", completion.choices[0].message.content)
|
||||
|
||||
# Guided decoding by Grammar
|
||||
simplified_sql_grammar = """
|
||||
root ::= select_statement
|
||||
def guided_grammar_completion(client: OpenAI, model: str):
|
||||
simplified_sql_grammar = """
|
||||
root ::= select_statement
|
||||
|
||||
select_statement ::= "SELECT " column " from " table " where " condition
|
||||
select_statement ::= "SELECT " column " from " table " where " condition
|
||||
|
||||
column ::= "col_1 " | "col_2 "
|
||||
column ::= "col_1 " | "col_2 "
|
||||
|
||||
table ::= "table_1 " | "table_2 "
|
||||
table ::= "table_1 " | "table_2 "
|
||||
|
||||
condition ::= column "= " number
|
||||
condition ::= column "= " number
|
||||
|
||||
number ::= "1 " | "2 "
|
||||
"""
|
||||
number ::= "1 " | "2 "
|
||||
"""
|
||||
|
||||
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
|
||||
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
||||
"from the 'users' table.")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_grammar": simplified_sql_grammar},
|
||||
)
|
||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
||||
print("content: ", completion.choices[0].message.content)
|
||||
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
|
||||
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
||||
"from the 'users' table.")
|
||||
completion = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}],
|
||||
extra_body={"guided_grammar": simplified_sql_grammar},
|
||||
)
|
||||
print_completion_details(completion)
|
||||
|
||||
|
||||
def main():
|
||||
client: OpenAI = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
|
||||
models = client.models.list()
|
||||
model: str = models.data[0].id
|
||||
|
||||
print("Guided Regex Completion:")
|
||||
guided_regex_completion(client, model)
|
||||
|
||||
print("\nGuided JSON Completion (People):")
|
||||
guided_json_completion(client, model)
|
||||
|
||||
print("\nGuided JSON Completion (CarDescription):")
|
||||
guided_car_json_completion(client, model)
|
||||
|
||||
print("\nGuided Grammar Completion:")
|
||||
guided_grammar_completion(client, model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user