mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-30 10:07:03 +08:00
[DOC] Add reasoning capability to vLLM streamlit code (#19557)
This commit is contained in:
parent
ee35e96ac3
commit
3e7506975c
@ -11,6 +11,7 @@ Features:
|
|||||||
- Streaming response display
|
- Streaming response display
|
||||||
- Configurable API endpoint
|
- Configurable API endpoint
|
||||||
- Real-time chat history
|
- Real-time chat history
|
||||||
|
- Reasoning Display: Optional thinking process visualization
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
pip install streamlit openai
|
pip install streamlit openai
|
||||||
@ -51,13 +52,33 @@ if "messages" not in st.session_state:
|
|||||||
if "active_session" not in st.session_state:
|
if "active_session" not in st.session_state:
|
||||||
st.session_state.active_session = None
|
st.session_state.active_session = None
|
||||||
|
|
||||||
|
# Add new session state for reasoning
|
||||||
|
if "show_reasoning" not in st.session_state:
|
||||||
|
st.session_state.show_reasoning = {}
|
||||||
|
|
||||||
# Initialize session state for API base URL
|
# Initialize session state for API base URL
|
||||||
if "api_base_url" not in st.session_state:
|
if "api_base_url" not in st.session_state:
|
||||||
st.session_state.api_base_url = openai_api_base
|
st.session_state.api_base_url = openai_api_base
|
||||||
|
|
||||||
|
|
||||||
def create_new_chat_session():
|
def create_new_chat_session():
|
||||||
"""Create a new chat session with timestamp as ID"""
|
"""Create a new chat session with timestamp as unique identifier.
|
||||||
|
|
||||||
|
This function initializes a new chat session by:
|
||||||
|
1. Generating a timestamp-based session ID
|
||||||
|
2. Creating an empty message list for the new session
|
||||||
|
3. Setting the new session as both current and active session
|
||||||
|
4. Resetting the messages list for the new session
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Session State Updates:
|
||||||
|
- sessions: Adds new empty message list with timestamp key
|
||||||
|
- current_session: Sets to new session ID
|
||||||
|
- active_session: Sets to new session ID
|
||||||
|
- messages: Resets to empty list
|
||||||
|
"""
|
||||||
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
st.session_state.sessions[session_id] = []
|
st.session_state.sessions[session_id] = []
|
||||||
st.session_state.current_session = session_id
|
st.session_state.current_session = session_id
|
||||||
@ -66,30 +87,98 @@ def create_new_chat_session():
|
|||||||
|
|
||||||
|
|
||||||
def switch_to_chat_session(session_id):
|
def switch_to_chat_session(session_id):
|
||||||
"""Switch to a different chat session"""
|
"""Switch the active chat context to a different session.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session_id (str): The timestamp ID of the session to switch to
|
||||||
|
|
||||||
|
This function handles chat session switching by:
|
||||||
|
1. Setting the specified session as current
|
||||||
|
2. Updating the active session marker
|
||||||
|
3. Loading the messages history from the specified session
|
||||||
|
|
||||||
|
Session State Updates:
|
||||||
|
- current_session: Updated to specified session_id
|
||||||
|
- active_session: Updated to specified session_id
|
||||||
|
- messages: Loaded from sessions[session_id]
|
||||||
|
"""
|
||||||
st.session_state.current_session = session_id
|
st.session_state.current_session = session_id
|
||||||
st.session_state.active_session = session_id
|
st.session_state.active_session = session_id
|
||||||
st.session_state.messages = st.session_state.sessions[session_id]
|
st.session_state.messages = st.session_state.sessions[session_id]
|
||||||
|
|
||||||
|
|
||||||
def get_llm_response(messages, model):
|
def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None):
|
||||||
"""Get streaming response from llm
|
"""Generate and stream LLM response with optional reasoning process.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: List of message dictionaries
|
messages (list): List of conversation message dicts with 'role' and 'content'
|
||||||
model: Name of model
|
model (str): The model identifier to use for generation
|
||||||
|
reason (bool): Whether to enable and display reasoning process
|
||||||
|
content_ph (streamlit.empty): Placeholder for streaming response content
|
||||||
|
reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Streaming response object or error message string
|
tuple: (str, str)
|
||||||
|
- First string contains the complete response text
|
||||||
|
- Second string contains the complete reasoning text (if enabled)
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Streams both reasoning and response text in real-time
|
||||||
|
- Handles model API errors gracefully
|
||||||
|
- Supports live updating of thinking process
|
||||||
|
- Maintains separate content and reasoning displays
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: Wrapped in error message if API call fails
|
||||||
|
|
||||||
|
Note:
|
||||||
|
The function uses streamlit placeholders for live updates.
|
||||||
|
When reason=True, the reasoning process appears above the response.
|
||||||
"""
|
"""
|
||||||
|
full_text = ""
|
||||||
|
think_text = ""
|
||||||
|
live_think = None
|
||||||
|
# Build request parameters
|
||||||
|
params = {"model": model, "messages": messages, "stream": True}
|
||||||
|
if reason:
|
||||||
|
params["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(**params)
|
||||||
model=model, messages=messages, stream=True
|
if isinstance(response, str):
|
||||||
)
|
if content_ph:
|
||||||
return response
|
content_ph.markdown(response)
|
||||||
|
return response, ""
|
||||||
|
|
||||||
|
# Prepare reasoning expander above content
|
||||||
|
if reason and reasoning_ph:
|
||||||
|
exp = reasoning_ph.expander("💭 Thinking Process (live)", expanded=True)
|
||||||
|
live_think = exp.empty()
|
||||||
|
|
||||||
|
# Stream chunks
|
||||||
|
for chunk in response:
|
||||||
|
delta = chunk.choices[0].delta
|
||||||
|
# Stream reasoning first
|
||||||
|
if reason and hasattr(delta, "reasoning_content") and live_think:
|
||||||
|
rc = delta.reasoning_content
|
||||||
|
if rc:
|
||||||
|
think_text += rc
|
||||||
|
live_think.markdown(think_text + "▌")
|
||||||
|
# Then stream content
|
||||||
|
if hasattr(delta, "content") and delta.content and content_ph:
|
||||||
|
full_text += delta.content
|
||||||
|
content_ph.markdown(full_text + "▌")
|
||||||
|
|
||||||
|
# Finalize displays: reasoning remains above, content below
|
||||||
|
if reason and live_think:
|
||||||
|
live_think.markdown(think_text)
|
||||||
|
if content_ph:
|
||||||
|
content_ph.markdown(full_text)
|
||||||
|
|
||||||
|
return full_text, think_text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
st.error(f"Error details: {str(e)}")
|
st.error(f"Error details: {str(e)}")
|
||||||
return f"Error: {str(e)}"
|
return f"Error: {str(e)}", ""
|
||||||
|
|
||||||
|
|
||||||
# Sidebar - API Settings first
|
# Sidebar - API Settings first
|
||||||
@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions")
|
|||||||
if st.sidebar.button("New Session"):
|
if st.sidebar.button("New Session"):
|
||||||
create_new_chat_session()
|
create_new_chat_session()
|
||||||
|
|
||||||
|
|
||||||
# Display all sessions in reverse chronological order
|
# Display all sessions in reverse chronological order
|
||||||
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
|
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
|
||||||
# Mark the active session with a pinned button
|
# Mark the active session with a pinned button
|
||||||
@ -143,47 +233,79 @@ if st.session_state.current_session is None:
|
|||||||
create_new_chat_session()
|
create_new_chat_session()
|
||||||
st.session_state.active_session = st.session_state.current_session
|
st.session_state.active_session = st.session_state.current_session
|
||||||
|
|
||||||
# Display chat history for current session
|
# Update the chat history display section
|
||||||
for message in st.session_state.messages:
|
for idx, msg in enumerate(st.session_state.messages):
|
||||||
with st.chat_message(message["role"]):
|
# Render user messages normally
|
||||||
st.write(message["content"])
|
if msg["role"] == "user":
|
||||||
|
with st.chat_message("user"):
|
||||||
|
st.write(msg["content"])
|
||||||
|
# Render assistant messages with reasoning above
|
||||||
|
else:
|
||||||
|
# If reasoning exists for this assistant message, show it above the content
|
||||||
|
if idx in st.session_state.show_reasoning:
|
||||||
|
with st.expander("💭 Thinking Process", expanded=False):
|
||||||
|
st.markdown(st.session_state.show_reasoning[idx])
|
||||||
|
with st.chat_message("assistant"):
|
||||||
|
st.write(msg["content"])
|
||||||
|
|
||||||
# Handle user input and generate llm response
|
|
||||||
|
# Setup & Cache reasoning support check
|
||||||
|
@st.cache_data(show_spinner=False)
|
||||||
|
def server_supports_reasoning():
|
||||||
|
"""Check if the current model supports reasoning capability.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the model supports reasoning, False otherwise
|
||||||
|
"""
|
||||||
|
resp = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": "Hi"}],
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
return hasattr(resp.choices[0].message, "reasoning_content") and bool(
|
||||||
|
resp.choices[0].message.reasoning_content
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Check support
|
||||||
|
supports_reasoning = server_supports_reasoning()
|
||||||
|
|
||||||
|
# Add reasoning toggle in sidebar if supported
|
||||||
|
reason = False # Default to False
|
||||||
|
if supports_reasoning:
|
||||||
|
reason = st.sidebar.checkbox("Enable Reasoning", value=False)
|
||||||
|
else:
|
||||||
|
st.sidebar.markdown(
|
||||||
|
"<span style='color:gray;'>Reasoning unavailable for this model.</span>",
|
||||||
|
unsafe_allow_html=True,
|
||||||
|
)
|
||||||
|
# reason remains False
|
||||||
|
|
||||||
|
# Update the input handling section
|
||||||
if prompt := st.chat_input("Type your message here..."):
|
if prompt := st.chat_input("Type your message here..."):
|
||||||
# Save user message to session
|
# Save and display user message
|
||||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||||
st.session_state.sessions[st.session_state.current_session] = (
|
st.session_state.sessions[st.session_state.current_session] = (
|
||||||
st.session_state.messages
|
st.session_state.messages
|
||||||
)
|
)
|
||||||
|
|
||||||
# Display user message
|
|
||||||
with st.chat_message("user"):
|
with st.chat_message("user"):
|
||||||
st.write(prompt)
|
st.write(prompt)
|
||||||
|
|
||||||
# Prepare messages for llm
|
# Prepare LLM messages
|
||||||
messages_for_llm = [
|
msgs = [
|
||||||
{"role": m["role"], "content": m["content"]} for m in st.session_state.messages
|
{"role": m["role"], "content": m["content"]} for m in st.session_state.messages
|
||||||
]
|
]
|
||||||
|
|
||||||
# Generate and display llm response
|
# Stream assistant response
|
||||||
with st.chat_message("assistant"):
|
with st.chat_message("assistant"):
|
||||||
message_placeholder = st.empty()
|
# Placeholders: reasoning above, content below
|
||||||
full_response = ""
|
reason_ph = st.empty()
|
||||||
|
content_ph = st.empty()
|
||||||
# Get streaming response from llm
|
full, think = get_llm_response(msgs, model, reason, content_ph, reason_ph)
|
||||||
response = get_llm_response(messages_for_llm, model)
|
# Determine index for this new assistant message
|
||||||
if isinstance(response, str):
|
message_index = len(st.session_state.messages)
|
||||||
message_placeholder.markdown(response)
|
# Save assistant reply
|
||||||
full_response = response
|
st.session_state.messages.append({"role": "assistant", "content": full})
|
||||||
else:
|
# Persist reasoning in session state if any
|
||||||
for chunk in response:
|
if reason and think:
|
||||||
if hasattr(chunk.choices[0].delta, "content"):
|
st.session_state.show_reasoning[message_index] = think
|
||||||
content = chunk.choices[0].delta.content
|
|
||||||
if content:
|
|
||||||
full_response += content
|
|
||||||
message_placeholder.markdown(full_response + "▌")
|
|
||||||
|
|
||||||
message_placeholder.markdown(full_response)
|
|
||||||
|
|
||||||
# Save llm response to session history
|
|
||||||
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user