diff --git a/examples/online_serving/streamlit_openai_chatbot_webserver.py b/examples/online_serving/streamlit_openai_chatbot_webserver.py index dab56172ee3a3..64c8a91782806 100644 --- a/examples/online_serving/streamlit_openai_chatbot_webserver.py +++ b/examples/online_serving/streamlit_openai_chatbot_webserver.py @@ -11,6 +11,7 @@ Features: - Streaming response display - Configurable API endpoint - Real-time chat history +- Reasoning Display: Optional thinking process visualization Requirements: pip install streamlit openai @@ -51,13 +52,33 @@ if "messages" not in st.session_state: if "active_session" not in st.session_state: st.session_state.active_session = None +# Add new session state for reasoning +if "show_reasoning" not in st.session_state: + st.session_state.show_reasoning = {} + # Initialize session state for API base URL if "api_base_url" not in st.session_state: st.session_state.api_base_url = openai_api_base def create_new_chat_session(): - """Create a new chat session with timestamp as ID""" + """Create a new chat session with timestamp as unique identifier. + + This function initializes a new chat session by: + 1. Generating a timestamp-based session ID + 2. Creating an empty message list for the new session + 3. Setting the new session as both current and active session + 4. Resetting the messages list for the new session + + Returns: + None + + Session State Updates: + - sessions: Adds new empty message list with timestamp key + - current_session: Sets to new session ID + - active_session: Sets to new session ID + - messages: Resets to empty list + """ session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S") st.session_state.sessions[session_id] = [] st.session_state.current_session = session_id @@ -66,30 +87,98 @@ def create_new_chat_session(): def switch_to_chat_session(session_id): - """Switch to a different chat session""" + """Switch the active chat context to a different session. + + Args: + session_id (str): The timestamp ID of the session to switch to + + This function handles chat session switching by: + 1. Setting the specified session as current + 2. Updating the active session marker + 3. Loading the messages history from the specified session + + Session State Updates: + - current_session: Updated to specified session_id + - active_session: Updated to specified session_id + - messages: Loaded from sessions[session_id] + """ st.session_state.current_session = session_id st.session_state.active_session = session_id st.session_state.messages = st.session_state.sessions[session_id] -def get_llm_response(messages, model): - """Get streaming response from llm +def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None): + """Generate and stream LLM response with optional reasoning process. Args: - messages: List of message dictionaries - model: Name of model + messages (list): List of conversation message dicts with 'role' and 'content' + model (str): The model identifier to use for generation + reason (bool): Whether to enable and display reasoning process + content_ph (streamlit.empty): Placeholder for streaming response content + reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process Returns: - Streaming response object or error message string + tuple: (str, str) + - First string contains the complete response text + - Second string contains the complete reasoning text (if enabled) + + Features: + - Streams both reasoning and response text in real-time + - Handles model API errors gracefully + - Supports live updating of thinking process + - Maintains separate content and reasoning displays + + Raises: + Exception: Wrapped in error message if API call fails + + Note: + The function uses streamlit placeholders for live updates. + When reason=True, the reasoning process appears above the response. """ + full_text = "" + think_text = "" + live_think = None + # Build request parameters + params = {"model": model, "messages": messages, "stream": True} + if reason: + params["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}} + try: - response = client.chat.completions.create( - model=model, messages=messages, stream=True - ) - return response + response = client.chat.completions.create(**params) + if isinstance(response, str): + if content_ph: + content_ph.markdown(response) + return response, "" + + # Prepare reasoning expander above content + if reason and reasoning_ph: + exp = reasoning_ph.expander("💭 Thinking Process (live)", expanded=True) + live_think = exp.empty() + + # Stream chunks + for chunk in response: + delta = chunk.choices[0].delta + # Stream reasoning first + if reason and hasattr(delta, "reasoning_content") and live_think: + rc = delta.reasoning_content + if rc: + think_text += rc + live_think.markdown(think_text + "▌") + # Then stream content + if hasattr(delta, "content") and delta.content and content_ph: + full_text += delta.content + content_ph.markdown(full_text + "▌") + + # Finalize displays: reasoning remains above, content below + if reason and live_think: + live_think.markdown(think_text) + if content_ph: + content_ph.markdown(full_text) + + return full_text, think_text except Exception as e: st.error(f"Error details: {str(e)}") - return f"Error: {str(e)}" + return f"Error: {str(e)}", "" # Sidebar - API Settings first @@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions") if st.sidebar.button("New Session"): create_new_chat_session() + # Display all sessions in reverse chronological order for session_id in sorted(st.session_state.sessions.keys(), reverse=True): # Mark the active session with a pinned button @@ -143,47 +233,79 @@ if st.session_state.current_session is None: create_new_chat_session() st.session_state.active_session = st.session_state.current_session -# Display chat history for current session -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.write(message["content"]) +# Update the chat history display section +for idx, msg in enumerate(st.session_state.messages): + # Render user messages normally + if msg["role"] == "user": + with st.chat_message("user"): + st.write(msg["content"]) + # Render assistant messages with reasoning above + else: + # If reasoning exists for this assistant message, show it above the content + if idx in st.session_state.show_reasoning: + with st.expander("💭 Thinking Process", expanded=False): + st.markdown(st.session_state.show_reasoning[idx]) + with st.chat_message("assistant"): + st.write(msg["content"]) -# Handle user input and generate llm response + +# Setup & Cache reasoning support check +@st.cache_data(show_spinner=False) +def server_supports_reasoning(): + """Check if the current model supports reasoning capability. + + Returns: + bool: True if the model supports reasoning, False otherwise + """ + resp = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "Hi"}], + stream=False, + ) + return hasattr(resp.choices[0].message, "reasoning_content") and bool( + resp.choices[0].message.reasoning_content + ) + + +# Check support +supports_reasoning = server_supports_reasoning() + +# Add reasoning toggle in sidebar if supported +reason = False # Default to False +if supports_reasoning: + reason = st.sidebar.checkbox("Enable Reasoning", value=False) +else: + st.sidebar.markdown( + "Reasoning unavailable for this model.", + unsafe_allow_html=True, + ) + # reason remains False + +# Update the input handling section if prompt := st.chat_input("Type your message here..."): - # Save user message to session + # Save and display user message st.session_state.messages.append({"role": "user", "content": prompt}) st.session_state.sessions[st.session_state.current_session] = ( st.session_state.messages ) - - # Display user message with st.chat_message("user"): st.write(prompt) - # Prepare messages for llm - messages_for_llm = [ + # Prepare LLM messages + msgs = [ {"role": m["role"], "content": m["content"]} for m in st.session_state.messages ] - # Generate and display llm response + # Stream assistant response with st.chat_message("assistant"): - message_placeholder = st.empty() - full_response = "" - - # Get streaming response from llm - response = get_llm_response(messages_for_llm, model) - if isinstance(response, str): - message_placeholder.markdown(response) - full_response = response - else: - for chunk in response: - if hasattr(chunk.choices[0].delta, "content"): - content = chunk.choices[0].delta.content - if content: - full_response += content - message_placeholder.markdown(full_response + "▌") - - message_placeholder.markdown(full_response) - - # Save llm response to session history - st.session_state.messages.append({"role": "assistant", "content": full_response}) + # Placeholders: reasoning above, content below + reason_ph = st.empty() + content_ph = st.empty() + full, think = get_llm_response(msgs, model, reason, content_ph, reason_ph) + # Determine index for this new assistant message + message_index = len(st.session_state.messages) + # Save assistant reply + st.session_state.messages.append({"role": "assistant", "content": full}) + # Persist reasoning in session state if any + if reason and think: + st.session_state.show_reasoning[message_index] = think