mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:25:01 +08:00
Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
45 lines
1.4 KiB
Python
45 lines
1.4 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
"""
|
|
Set up this example by starting a vLLM OpenAI-compatible server.
|
|
Reasoning models can be used through the Responses API as seen here
|
|
https://platform.openai.com/docs/api-reference/responses
|
|
For example:
|
|
vllm serve Qwen/Qwen3-8B --reasoning-parser qwen3
|
|
|
|
"""
|
|
|
|
from openai import OpenAI
|
|
|
|
input_messages = [{"role": "user", "content": "What model are you?"}]
|
|
|
|
|
|
def main():
|
|
base_url = "http://localhost:8000/v1"
|
|
client = OpenAI(base_url=base_url, api_key="empty")
|
|
model = "Qwen/Qwen3-8B" # get_first_model(client)
|
|
response = client.responses.create(
|
|
model=model,
|
|
input=input_messages,
|
|
)
|
|
|
|
for message in response.output:
|
|
if message.type == "reasoning":
|
|
# append reasoning message
|
|
input_messages.append(message)
|
|
|
|
response_2 = client.responses.create(
|
|
model=model,
|
|
input=input_messages,
|
|
)
|
|
print(response_2.output_text)
|
|
# I am Qwen, a large language model developed by Alibaba Cloud.
|
|
# I am designed to assist with a wide range of tasks, including
|
|
# answering questions, creating content, coding, and engaging in
|
|
# conversations. I can help with various topics and provide
|
|
# information or support in multiple languages. How can I assist you today?
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|