[DOC] Add reasoning capability to vLLM streamlit code (#19557)

This commit is contained in:
Navanit Dubey 2025-06-16 16:39:12 +05:30 committed by GitHub
parent ee35e96ac3
commit 3e7506975c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11,6 +11,7 @@ Features:
- Streaming response display - Streaming response display
- Configurable API endpoint - Configurable API endpoint
- Real-time chat history - Real-time chat history
- Reasoning Display: Optional thinking process visualization
Requirements: Requirements:
pip install streamlit openai pip install streamlit openai
@ -51,13 +52,33 @@ if "messages" not in st.session_state:
if "active_session" not in st.session_state: if "active_session" not in st.session_state:
st.session_state.active_session = None st.session_state.active_session = None
# Add new session state for reasoning
if "show_reasoning" not in st.session_state:
st.session_state.show_reasoning = {}
# Initialize session state for API base URL # Initialize session state for API base URL
if "api_base_url" not in st.session_state: if "api_base_url" not in st.session_state:
st.session_state.api_base_url = openai_api_base st.session_state.api_base_url = openai_api_base
def create_new_chat_session(): def create_new_chat_session():
"""Create a new chat session with timestamp as ID""" """Create a new chat session with timestamp as unique identifier.
This function initializes a new chat session by:
1. Generating a timestamp-based session ID
2. Creating an empty message list for the new session
3. Setting the new session as both current and active session
4. Resetting the messages list for the new session
Returns:
None
Session State Updates:
- sessions: Adds new empty message list with timestamp key
- current_session: Sets to new session ID
- active_session: Sets to new session ID
- messages: Resets to empty list
"""
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S") session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state.sessions[session_id] = [] st.session_state.sessions[session_id] = []
st.session_state.current_session = session_id st.session_state.current_session = session_id
@ -66,30 +87,98 @@ def create_new_chat_session():
def switch_to_chat_session(session_id): def switch_to_chat_session(session_id):
"""Switch to a different chat session""" """Switch the active chat context to a different session.
Args:
session_id (str): The timestamp ID of the session to switch to
This function handles chat session switching by:
1. Setting the specified session as current
2. Updating the active session marker
3. Loading the messages history from the specified session
Session State Updates:
- current_session: Updated to specified session_id
- active_session: Updated to specified session_id
- messages: Loaded from sessions[session_id]
"""
st.session_state.current_session = session_id st.session_state.current_session = session_id
st.session_state.active_session = session_id st.session_state.active_session = session_id
st.session_state.messages = st.session_state.sessions[session_id] st.session_state.messages = st.session_state.sessions[session_id]
def get_llm_response(messages, model): def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None):
"""Get streaming response from llm """Generate and stream LLM response with optional reasoning process.
Args: Args:
messages: List of message dictionaries messages (list): List of conversation message dicts with 'role' and 'content'
model: Name of model model (str): The model identifier to use for generation
reason (bool): Whether to enable and display reasoning process
content_ph (streamlit.empty): Placeholder for streaming response content
reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
Returns: Returns:
Streaming response object or error message string tuple: (str, str)
- First string contains the complete response text
- Second string contains the complete reasoning text (if enabled)
Features:
- Streams both reasoning and response text in real-time
- Handles model API errors gracefully
- Supports live updating of thinking process
- Maintains separate content and reasoning displays
Raises:
Exception: Wrapped in error message if API call fails
Note:
The function uses streamlit placeholders for live updates.
When reason=True, the reasoning process appears above the response.
""" """
full_text = ""
think_text = ""
live_think = None
# Build request parameters
params = {"model": model, "messages": messages, "stream": True}
if reason:
params["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}}
try: try:
response = client.chat.completions.create( response = client.chat.completions.create(**params)
model=model, messages=messages, stream=True if isinstance(response, str):
) if content_ph:
return response content_ph.markdown(response)
return response, ""
# Prepare reasoning expander above content
if reason and reasoning_ph:
exp = reasoning_ph.expander("💭 Thinking Process (live)", expanded=True)
live_think = exp.empty()
# Stream chunks
for chunk in response:
delta = chunk.choices[0].delta
# Stream reasoning first
if reason and hasattr(delta, "reasoning_content") and live_think:
rc = delta.reasoning_content
if rc:
think_text += rc
live_think.markdown(think_text + "")
# Then stream content
if hasattr(delta, "content") and delta.content and content_ph:
full_text += delta.content
content_ph.markdown(full_text + "")
# Finalize displays: reasoning remains above, content below
if reason and live_think:
live_think.markdown(think_text)
if content_ph:
content_ph.markdown(full_text)
return full_text, think_text
except Exception as e: except Exception as e:
st.error(f"Error details: {str(e)}") st.error(f"Error details: {str(e)}")
return f"Error: {str(e)}" return f"Error: {str(e)}", ""
# Sidebar - API Settings first # Sidebar - API Settings first
@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions")
if st.sidebar.button("New Session"): if st.sidebar.button("New Session"):
create_new_chat_session() create_new_chat_session()
# Display all sessions in reverse chronological order # Display all sessions in reverse chronological order
for session_id in sorted(st.session_state.sessions.keys(), reverse=True): for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
# Mark the active session with a pinned button # Mark the active session with a pinned button
@ -143,47 +233,79 @@ if st.session_state.current_session is None:
create_new_chat_session() create_new_chat_session()
st.session_state.active_session = st.session_state.current_session st.session_state.active_session = st.session_state.current_session
# Display chat history for current session # Update the chat history display section
for message in st.session_state.messages: for idx, msg in enumerate(st.session_state.messages):
with st.chat_message(message["role"]): # Render user messages normally
st.write(message["content"]) if msg["role"] == "user":
with st.chat_message("user"):
st.write(msg["content"])
# Render assistant messages with reasoning above
else:
# If reasoning exists for this assistant message, show it above the content
if idx in st.session_state.show_reasoning:
with st.expander("💭 Thinking Process", expanded=False):
st.markdown(st.session_state.show_reasoning[idx])
with st.chat_message("assistant"):
st.write(msg["content"])
# Handle user input and generate llm response
# Setup & Cache reasoning support check
@st.cache_data(show_spinner=False)
def server_supports_reasoning():
"""Check if the current model supports reasoning capability.
Returns:
bool: True if the model supports reasoning, False otherwise
"""
resp = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "Hi"}],
stream=False,
)
return hasattr(resp.choices[0].message, "reasoning_content") and bool(
resp.choices[0].message.reasoning_content
)
# Check support
supports_reasoning = server_supports_reasoning()
# Add reasoning toggle in sidebar if supported
reason = False # Default to False
if supports_reasoning:
reason = st.sidebar.checkbox("Enable Reasoning", value=False)
else:
st.sidebar.markdown(
"<span style='color:gray;'>Reasoning unavailable for this model.</span>",
unsafe_allow_html=True,
)
# reason remains False
# Update the input handling section
if prompt := st.chat_input("Type your message here..."): if prompt := st.chat_input("Type your message here..."):
# Save user message to session # Save and display user message
st.session_state.messages.append({"role": "user", "content": prompt}) st.session_state.messages.append({"role": "user", "content": prompt})
st.session_state.sessions[st.session_state.current_session] = ( st.session_state.sessions[st.session_state.current_session] = (
st.session_state.messages st.session_state.messages
) )
# Display user message
with st.chat_message("user"): with st.chat_message("user"):
st.write(prompt) st.write(prompt)
# Prepare messages for llm # Prepare LLM messages
messages_for_llm = [ msgs = [
{"role": m["role"], "content": m["content"]} for m in st.session_state.messages {"role": m["role"], "content": m["content"]} for m in st.session_state.messages
] ]
# Generate and display llm response # Stream assistant response
with st.chat_message("assistant"): with st.chat_message("assistant"):
message_placeholder = st.empty() # Placeholders: reasoning above, content below
full_response = "" reason_ph = st.empty()
content_ph = st.empty()
# Get streaming response from llm full, think = get_llm_response(msgs, model, reason, content_ph, reason_ph)
response = get_llm_response(messages_for_llm, model) # Determine index for this new assistant message
if isinstance(response, str): message_index = len(st.session_state.messages)
message_placeholder.markdown(response) # Save assistant reply
full_response = response st.session_state.messages.append({"role": "assistant", "content": full})
else: # Persist reasoning in session state if any
for chunk in response: if reason and think:
if hasattr(chunk.choices[0].delta, "content"): st.session_state.show_reasoning[message_index] = think
content = chunk.choices[0].delta.content
if content:
full_response += content
message_placeholder.markdown(full_response + "")
message_placeholder.markdown(full_response)
# Save llm response to session history
st.session_state.messages.append({"role": "assistant", "content": full_response})