mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 17:15:42 +08:00
[Misc] refactor example series - structured outputs (#17040)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
parent
82e43b2d7e
commit
1bcbcbf574
@ -1,43 +1,49 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
"""
|
||||||
|
To run this example, you need to start the vLLM server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
vllm serve Qwen/Qwen2.5-3B-Instruct
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from openai import BadRequestError, OpenAI
|
from openai import BadRequestError, OpenAI
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
base_url="http://localhost:8000/v1",
|
|
||||||
api_key="-",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Guided decoding by Choice (list of possible options)
|
# Guided decoding by Choice (list of possible options)
|
||||||
completion = client.chat.completions.create(
|
def guided_choice_completion(client: OpenAI, model: str):
|
||||||
model="Qwen/Qwen2.5-3B-Instruct",
|
completion = client.chat.completions.create(
|
||||||
messages=[{
|
model=model,
|
||||||
"role": "user",
|
messages=[{
|
||||||
"content": "Classify this sentiment: vLLM is wonderful!"
|
"role": "user",
|
||||||
}],
|
"content": "Classify this sentiment: vLLM is wonderful!"
|
||||||
extra_body={"guided_choice": ["positive", "negative"]},
|
}],
|
||||||
)
|
extra_body={"guided_choice": ["positive", "negative"]},
|
||||||
print(completion.choices[0].message.content)
|
)
|
||||||
|
return completion.choices[0].message.content
|
||||||
|
|
||||||
|
|
||||||
# Guided decoding by Regex
|
# Guided decoding by Regex
|
||||||
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
def guided_regex_completion(client: OpenAI, model: str):
|
||||||
"End in .com and new line. Example result:"
|
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
||||||
"alan.turing@enigma.com\n")
|
"End in .com and new line. Example result:"
|
||||||
|
"alan.turing@enigma.com\n")
|
||||||
|
|
||||||
completion = client.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model="Qwen/Qwen2.5-3B-Instruct",
|
model=model,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt,
|
"content": prompt,
|
||||||
}],
|
}],
|
||||||
extra_body={
|
extra_body={
|
||||||
"guided_regex": r"\w+@\w+\.com\n",
|
"guided_regex": r"\w+@\w+\.com\n",
|
||||||
"stop": ["\n"]
|
"stop": ["\n"]
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
print(completion.choices[0].message.content)
|
return completion.choices[0].message.content
|
||||||
|
|
||||||
|
|
||||||
# Guided decoding by JSON using Pydantic schema
|
# Guided decoding by JSON using Pydantic schema
|
||||||
@ -54,66 +60,100 @@ class CarDescription(BaseModel):
|
|||||||
car_type: CarType
|
car_type: CarType
|
||||||
|
|
||||||
|
|
||||||
json_schema = CarDescription.model_json_schema()
|
def guided_json_completion(client: OpenAI, model: str):
|
||||||
|
json_schema = CarDescription.model_json_schema()
|
||||||
|
|
||||||
prompt = ("Generate a JSON with the brand, model and car_type of"
|
prompt = ("Generate a JSON with the brand, model and car_type of"
|
||||||
"the most iconic car from the 90's")
|
"the most iconic car from the 90's")
|
||||||
completion = client.chat.completions.create(
|
|
||||||
model="Qwen/Qwen2.5-3B-Instruct",
|
|
||||||
messages=[{
|
|
||||||
"role": "user",
|
|
||||||
"content": prompt,
|
|
||||||
}],
|
|
||||||
extra_body={"guided_json": json_schema},
|
|
||||||
)
|
|
||||||
print(completion.choices[0].message.content)
|
|
||||||
|
|
||||||
# Guided decoding by Grammar
|
|
||||||
simplified_sql_grammar = """
|
|
||||||
root ::= select_statement
|
|
||||||
|
|
||||||
select_statement ::= "SELECT " column " from " table " where " condition
|
|
||||||
|
|
||||||
column ::= "col_1 " | "col_2 "
|
|
||||||
|
|
||||||
table ::= "table_1 " | "table_2 "
|
|
||||||
|
|
||||||
condition ::= column "= " number
|
|
||||||
|
|
||||||
number ::= "1 " | "2 "
|
|
||||||
"""
|
|
||||||
|
|
||||||
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
|
||||||
"from the 'users' table.")
|
|
||||||
completion = client.chat.completions.create(
|
|
||||||
model="Qwen/Qwen2.5-3B-Instruct",
|
|
||||||
messages=[{
|
|
||||||
"role": "user",
|
|
||||||
"content": prompt,
|
|
||||||
}],
|
|
||||||
extra_body={"guided_grammar": simplified_sql_grammar},
|
|
||||||
)
|
|
||||||
print(completion.choices[0].message.content)
|
|
||||||
|
|
||||||
# Extra backend options
|
|
||||||
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
|
||||||
"End in .com and new line. Example result:"
|
|
||||||
"alan.turing@enigma.com\n")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# The no-fallback option forces vLLM to use xgrammar, so when it fails
|
|
||||||
# you get a 400 with the reason why
|
|
||||||
completion = client.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model="Qwen/Qwen2.5-3B-Instruct",
|
model=model,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt,
|
"content": prompt,
|
||||||
}],
|
}],
|
||||||
extra_body={
|
extra_body={"guided_json": json_schema},
|
||||||
"guided_regex": r"\w+@\w+\.com\n",
|
|
||||||
"stop": ["\n"],
|
|
||||||
"guided_decoding_backend": "xgrammar:no-fallback"
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
except BadRequestError as e:
|
return completion.choices[0].message.content
|
||||||
print("This error is expected:", e)
|
|
||||||
|
|
||||||
|
# Guided decoding by Grammar
|
||||||
|
def guided_grammar_completion(client: OpenAI, model: str):
|
||||||
|
simplified_sql_grammar = """
|
||||||
|
root ::= select_statement
|
||||||
|
|
||||||
|
select_statement ::= "SELECT " column " from " table " where " condition
|
||||||
|
|
||||||
|
column ::= "col_1 " | "col_2 "
|
||||||
|
|
||||||
|
table ::= "table_1 " | "table_2 "
|
||||||
|
|
||||||
|
condition ::= column "= " number
|
||||||
|
|
||||||
|
number ::= "1 " | "2 "
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
||||||
|
"from the 'users' table.")
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{
|
||||||
|
"role": "user",
|
||||||
|
"content": prompt,
|
||||||
|
}],
|
||||||
|
extra_body={"guided_grammar": simplified_sql_grammar},
|
||||||
|
)
|
||||||
|
return completion.choices[0].message.content
|
||||||
|
|
||||||
|
|
||||||
|
# Extra backend options
|
||||||
|
def extra_backend_options_completion(client: OpenAI, model: str):
|
||||||
|
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
|
||||||
|
"End in .com and new line. Example result:"
|
||||||
|
"alan.turing@enigma.com\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# The no-fallback option forces vLLM to use xgrammar, so when it fails
|
||||||
|
# you get a 400 with the reason why
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{
|
||||||
|
"role": "user",
|
||||||
|
"content": prompt,
|
||||||
|
}],
|
||||||
|
extra_body={
|
||||||
|
"guided_regex": r"\w+@\w+\.com\n",
|
||||||
|
"stop": ["\n"],
|
||||||
|
"guided_decoding_backend": "xgrammar:no-fallback"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return completion.choices[0].message.content
|
||||||
|
except BadRequestError as e:
|
||||||
|
print("This error is expected:", e)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
client: OpenAI = OpenAI(
|
||||||
|
base_url="http://localhost:8000/v1",
|
||||||
|
api_key="-",
|
||||||
|
)
|
||||||
|
|
||||||
|
model = "Qwen/Qwen2.5-3B-Instruct"
|
||||||
|
|
||||||
|
print("Guided Choice Completion:")
|
||||||
|
print(guided_choice_completion(client, model))
|
||||||
|
|
||||||
|
print("\nGuided Regex Completion:")
|
||||||
|
print(guided_regex_completion(client, model))
|
||||||
|
|
||||||
|
print("\nGuided JSON Completion:")
|
||||||
|
print(guided_json_completion(client, model))
|
||||||
|
|
||||||
|
print("\nGuided Grammar Completion:")
|
||||||
|
print(guided_grammar_completion(client, model))
|
||||||
|
|
||||||
|
print("\nExtra Backend Options Completion:")
|
||||||
|
print(extra_backend_options_completion(client, model))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|||||||
@ -25,29 +25,28 @@ from pydantic import BaseModel
|
|||||||
openai_api_key = "EMPTY"
|
openai_api_key = "EMPTY"
|
||||||
openai_api_base = "http://localhost:8000/v1"
|
openai_api_base = "http://localhost:8000/v1"
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
api_key=openai_api_key,
|
|
||||||
base_url=openai_api_base,
|
|
||||||
)
|
|
||||||
|
|
||||||
models = client.models.list()
|
def print_completion_details(completion):
|
||||||
model = models.data[0].id
|
print("reasoning_content: ",
|
||||||
|
completion.choices[0].message.reasoning_content)
|
||||||
|
print("content: ", completion.choices[0].message.content)
|
||||||
|
|
||||||
|
|
||||||
# Guided decoding by Regex
|
# Guided decoding by Regex
|
||||||
prompt = ("What is the capital of France?")
|
def guided_regex_completion(client: OpenAI, model: str):
|
||||||
|
prompt = ("What is the capital of France?")
|
||||||
|
|
||||||
completion = client.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt,
|
"content": prompt,
|
||||||
}],
|
}],
|
||||||
extra_body={
|
extra_body={
|
||||||
"guided_regex": "(Paris|London)",
|
"guided_regex": "(Paris|London)",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
print_completion_details(completion)
|
||||||
print("content: ", completion.choices[0].message.content)
|
|
||||||
|
|
||||||
|
|
||||||
class People(BaseModel):
|
class People(BaseModel):
|
||||||
@ -55,19 +54,19 @@ class People(BaseModel):
|
|||||||
age: int
|
age: int
|
||||||
|
|
||||||
|
|
||||||
json_schema = People.model_json_schema()
|
def guided_json_completion(client: OpenAI, model: str):
|
||||||
|
json_schema = People.model_json_schema()
|
||||||
|
|
||||||
prompt = ("Generate a JSON with the name and age of one random person.")
|
prompt = ("Generate a JSON with the name and age of one random person.")
|
||||||
completion = client.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt,
|
"content": prompt,
|
||||||
}],
|
}],
|
||||||
extra_body={"guided_json": json_schema},
|
extra_body={"guided_json": json_schema},
|
||||||
)
|
)
|
||||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
print_completion_details(completion)
|
||||||
print("content: ", completion.choices[0].message.content)
|
|
||||||
|
|
||||||
|
|
||||||
# Guided decoding by JSON using Pydantic schema
|
# Guided decoding by JSON using Pydantic schema
|
||||||
@ -84,46 +83,73 @@ class CarDescription(BaseModel):
|
|||||||
car_type: CarType
|
car_type: CarType
|
||||||
|
|
||||||
|
|
||||||
json_schema = CarDescription.model_json_schema()
|
def guided_car_json_completion(client: OpenAI, model: str):
|
||||||
|
json_schema = CarDescription.model_json_schema()
|
||||||
|
|
||||||
|
prompt = ("Generate a JSON with the brand, model and car_type of"
|
||||||
|
"the most iconic car from the 90's")
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{
|
||||||
|
"role": "user",
|
||||||
|
"content": prompt,
|
||||||
|
}],
|
||||||
|
extra_body={"guided_json": json_schema},
|
||||||
|
)
|
||||||
|
print_completion_details(completion)
|
||||||
|
|
||||||
prompt = ("Generate a JSON with the brand, model and car_type of"
|
|
||||||
"the most iconic car from the 90's")
|
|
||||||
completion = client.chat.completions.create(
|
|
||||||
model=model,
|
|
||||||
messages=[{
|
|
||||||
"role": "user",
|
|
||||||
"content": prompt,
|
|
||||||
}],
|
|
||||||
extra_body={"guided_json": json_schema},
|
|
||||||
)
|
|
||||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
|
||||||
print("content: ", completion.choices[0].message.content)
|
|
||||||
|
|
||||||
# Guided decoding by Grammar
|
# Guided decoding by Grammar
|
||||||
simplified_sql_grammar = """
|
def guided_grammar_completion(client: OpenAI, model: str):
|
||||||
root ::= select_statement
|
simplified_sql_grammar = """
|
||||||
|
root ::= select_statement
|
||||||
|
|
||||||
select_statement ::= "SELECT " column " from " table " where " condition
|
select_statement ::= "SELECT " column " from " table " where " condition
|
||||||
|
|
||||||
column ::= "col_1 " | "col_2 "
|
column ::= "col_1 " | "col_2 "
|
||||||
|
|
||||||
table ::= "table_1 " | "table_2 "
|
table ::= "table_1 " | "table_2 "
|
||||||
|
|
||||||
condition ::= column "= " number
|
condition ::= column "= " number
|
||||||
|
|
||||||
number ::= "1 " | "2 "
|
number ::= "1 " | "2 "
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
|
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
|
||||||
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
prompt = ("Generate an SQL query to show the 'username' and 'email'"
|
||||||
"from the 'users' table.")
|
"from the 'users' table.")
|
||||||
completion = client.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": prompt,
|
"content": prompt,
|
||||||
}],
|
}],
|
||||||
extra_body={"guided_grammar": simplified_sql_grammar},
|
extra_body={"guided_grammar": simplified_sql_grammar},
|
||||||
)
|
)
|
||||||
print("reasoning_content: ", completion.choices[0].message.reasoning_content)
|
print_completion_details(completion)
|
||||||
print("content: ", completion.choices[0].message.content)
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
client: OpenAI = OpenAI(
|
||||||
|
api_key=openai_api_key,
|
||||||
|
base_url=openai_api_base,
|
||||||
|
)
|
||||||
|
|
||||||
|
models = client.models.list()
|
||||||
|
model: str = models.data[0].id
|
||||||
|
|
||||||
|
print("Guided Regex Completion:")
|
||||||
|
guided_regex_completion(client, model)
|
||||||
|
|
||||||
|
print("\nGuided JSON Completion (People):")
|
||||||
|
guided_json_completion(client, model)
|
||||||
|
|
||||||
|
print("\nGuided JSON Completion (CarDescription):")
|
||||||
|
guided_car_json_completion(client, model)
|
||||||
|
|
||||||
|
print("\nGuided Grammar Completion:")
|
||||||
|
guided_grammar_completion(client, model)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user