mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 20:15:42 +08:00
[Doc] Add doc to explain the usage of Qwen3 thinking (#18291)
Signed-off-by: WangErXiao <863579016@qq.com>
This commit is contained in:
parent
b6a6e7a529
commit
d1211f8794
@ -19,6 +19,7 @@ vLLM currently supports the following reasoning models:
|
|||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`.
|
IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`.
|
||||||
|
The reasoning feature for the Qwen3 series is enabled by default. To disable it, you must pass `enable_thinking=False` in your `chat_template_kwargs`.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
@ -49,6 +50,8 @@ model = models.data[0].id
|
|||||||
# Round 1
|
# Round 1
|
||||||
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
||||||
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
||||||
|
# For Qwen3 series, if you want to disable thinking in reasoning mode, add:
|
||||||
|
# extra_body={"chat_template_kwargs": {"enable_thinking": False}}
|
||||||
response = client.chat.completions.create(model=model, messages=messages)
|
response = client.chat.completions.create(model=model, messages=messages)
|
||||||
|
|
||||||
reasoning_content = response.choices[0].message.reasoning_content
|
reasoning_content = response.choices[0].message.reasoning_content
|
||||||
@ -104,6 +107,8 @@ model = models.data[0].id
|
|||||||
|
|
||||||
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
|
||||||
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
|
||||||
|
# For Qwen3 series, if you want to disable thinking in reasoning mode, add:
|
||||||
|
# extra_body={"chat_template_kwargs": {"enable_thinking": False}}
|
||||||
stream = client.chat.completions.create(model=model,
|
stream = client.chat.completions.create(model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True)
|
stream=True)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user