[doc] add streamlit integration (#17522)

Signed-off-by: reidliu41 <reid201711@gmail.com>
Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
Reid 2025-05-01 21:34:02 +08:00 committed by GitHub
parent b74d888c63
commit 7169f87ad0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 228 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

View File

@ -12,5 +12,6 @@ lws
modal
open-webui
skypilot
streamlit
triton
:::

View File

@ -0,0 +1,42 @@
(deployment-streamlit)=
# Streamlit
[Streamlit](https://github.com/streamlit/streamlit) lets you transform Python scripts into interactive web apps in minutes, instead of weeks. Build dashboards, generate reports, or create chat apps.
It can be quickly integrated with vLLM as a backend API server, enabling powerful LLM inference via API calls.
## Prerequisites
- Setup vLLM environment
## Deploy
- Start the vLLM server with the supported chat completion model, e.g.
```console
vllm serve qwen/Qwen1.5-0.5B-Chat
```
- Install streamlit and openai:
```console
pip install streamlit openai
```
- Use the script: <gh-file:examples/online_serving/streamlit_openai_chatbot_webserver.py>
- Start the streamlit web UI and start to chat:
```console
streamlit run streamlit_openai_chatbot_webserver.py
# or specify the VLLM_API_BASE or VLLM_API_KEY
VLLM_API_BASE="http://vllm-server-host:vllm-server-port/v1" streamlit run streamlit_openai_chatbot_webserver.py
# start with debug mode to view more details
streamlit run streamlit_openai_chatbot_webserver.py --logger.level=debug
```
:::{image} /assets/deployment/streamlit-chat.png
:::

View File

@ -0,0 +1,185 @@
# SPDX-License-Identifier: Apache-2.0
"""
vLLM Chat Assistant - A Streamlit Web Interface
A streamlined chat interface that quickly integrates
with vLLM API server.
Features:
- Multiple chat sessions management
- Streaming response display
- Configurable API endpoint
- Real-time chat history
Requirements:
pip install streamlit openai
Usage:
# Start the app with default settings
streamlit run streamlit_openai_chatbot_webserver.py
# Start with custom vLLM API endpoint
VLLM_API_BASE="http://your-server:8000/v1" \
streamlit run streamlit_openai_chatbot_webserver.py
# Enable debug mode
streamlit run streamlit_openai_chatbot_webserver.py \
--logger.level=debug
"""
import os
from datetime import datetime
import streamlit as st
from openai import OpenAI
# Get command line arguments from environment variables
openai_api_key = os.getenv('VLLM_API_KEY', "EMPTY")
openai_api_base = os.getenv('VLLM_API_BASE', "http://localhost:8000/v1")
# Initialize session states for managing chat sessions
if "sessions" not in st.session_state:
st.session_state.sessions = {}
if "current_session" not in st.session_state:
st.session_state.current_session = None
if "messages" not in st.session_state:
st.session_state.messages = []
if "active_session" not in st.session_state:
st.session_state.active_session = None
# Initialize session state for API base URL
if "api_base_url" not in st.session_state:
st.session_state.api_base_url = openai_api_base
def create_new_chat_session():
"""Create a new chat session with timestamp as ID"""
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state.sessions[session_id] = []
st.session_state.current_session = session_id
st.session_state.active_session = session_id
st.session_state.messages = []
def switch_to_chat_session(session_id):
"""Switch to a different chat session"""
st.session_state.current_session = session_id
st.session_state.active_session = session_id
st.session_state.messages = st.session_state.sessions[session_id]
def get_llm_response(messages, model):
"""Get streaming response from llm
Args:
messages: List of message dictionaries
model: Name of model
Returns:
Streaming response object or error message string
"""
try:
response = client.chat.completions.create(model=model,
messages=messages,
stream=True)
return response
except Exception as e:
st.error(f"Error details: {str(e)}")
return f"Error: {str(e)}"
# Sidebar - API Settings first
st.sidebar.title("API Settings")
new_api_base = st.sidebar.text_input("API Base URL:",
value=st.session_state.api_base_url)
if new_api_base != st.session_state.api_base_url:
st.session_state.api_base_url = new_api_base
st.rerun()
st.sidebar.divider()
# Sidebar - Session Management
st.sidebar.title("Chat Sessions")
if st.sidebar.button("New Session"):
create_new_chat_session()
# Display all sessions in reverse chronological order
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
# Mark the active session with a pinned button
if session_id == st.session_state.active_session:
st.sidebar.button(f"📍 {session_id}",
key=session_id,
type="primary",
on_click=switch_to_chat_session,
args=(session_id, ))
else:
st.sidebar.button(f"Session {session_id}",
key=session_id,
on_click=switch_to_chat_session,
args=(session_id, ))
# Main interface
st.title("vLLM Chat Assistant")
# Initialize OpenAI client with API settings
client = OpenAI(api_key=openai_api_key, base_url=st.session_state.api_base_url)
# Get and display current model id
models = client.models.list()
model = models.data[0].id
st.markdown(f"**Model**: {model}")
# Initialize first session if none exists
if st.session_state.current_session is None:
create_new_chat_session()
st.session_state.active_session = st.session_state.current_session
# Display chat history for current session
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
# Handle user input and generate llm response
if prompt := st.chat_input("Type your message here..."):
# Save user message to session
st.session_state.messages.append({"role": "user", "content": prompt})
st.session_state.sessions[
st.session_state.current_session] = st.session_state.messages
# Display user message
with st.chat_message("user"):
st.write(prompt)
# Prepare messages for llm
messages_for_llm = [{
"role": m["role"],
"content": m["content"]
} for m in st.session_state.messages]
# Generate and display llm response
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
# Get streaming response from llm
response = get_llm_response(messages_for_llm, model)
if isinstance(response, str):
message_placeholder.markdown(response)
full_response = response
else:
for chunk in response:
if hasattr(chunk.choices[0].delta, "content"):
content = chunk.choices[0].delta.content
if content:
full_response += content
message_placeholder.markdown(full_response + "")
message_placeholder.markdown(full_response)
# Save llm response to session history
st.session_state.messages.append({
"role": "assistant",
"content": full_response
})