From d1211f8794c28da64f80e55464b4ce6f97b2c0cb Mon Sep 17 00:00:00 2001
From: Robin <863579016@qq.com>
Date: Mon, 19 May 2025 07:04:07 +0800
Subject: [PATCH] [Doc] Add doc to explain the usage of Qwen3 thinking (#18291)

Signed-off-by: WangErXiao <863579016@qq.com>
---
 docs/source/features/reasoning_outputs.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/source/features/reasoning_outputs.md b/docs/source/features/reasoning_outputs.md
index 7a761ff9a4d9..bf4f8901a11a 100644
--- a/docs/source/features/reasoning_outputs.md
+++ b/docs/source/features/reasoning_outputs.md
@@ -19,6 +19,7 @@ vLLM currently supports the following reasoning models:
 
 :::{note}
 IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`.
+The reasoning feature for the Qwen3 series is enabled by default. To disable it, you must pass `enable_thinking=False` in your `chat_template_kwargs`.
 :::
 
 ## Quickstart
@@ -49,6 +50,8 @@ model = models.data[0].id
 # Round 1
 messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
 # For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
+# For Qwen3 series, if you want to disable thinking in reasoning mode, add:
+# extra_body={"chat_template_kwargs": {"enable_thinking": False}}
 response = client.chat.completions.create(model=model, messages=messages)
 
 reasoning_content = response.choices[0].message.reasoning_content
@@ -104,6 +107,8 @@ model = models.data[0].id
 
 messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
 # For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
+# For Qwen3 series, if you want to disable thinking in reasoning mode, add:
+# extra_body={"chat_template_kwargs": {"enable_thinking": False}}
 stream = client.chat.completions.create(model=model,
                                         messages=messages,
                                         stream=True)