Add Open WebUI integration and enhance Streamlit app

- Add Open WebUI scripts (06-09) for server-hosted ChatGPT-like interface
  connected to the vLLM backend on port 7081
- Add context window management to chat (auto-trim, token counter, progress bar)
- Add terminal output panel to file editor for running Python/LaTeX files
- Update README with Open WebUI setup, architecture diagram, and troubleshooting
- Update STUDENT_GUIDE with step-by-step Open WebUI login instructions

Made-with: Cursor
This commit is contained in:
herzogflorian 2026-03-02 18:48:51 +01:00
parent d59285fe69
commit f4fdaab732
8 changed files with 559 additions and 89 deletions

3
.gitignore vendored
View File

@ -16,5 +16,8 @@ models/
# Streamlit workspace files
workspace/
# Open WebUI persistent data (user accounts, chats, DB)
openwebui-data/
# macOS
.DS_Store

37
06_setup_openwebui.sh Executable file
View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
# ------------------------------------------------------------------
# 06_setup_openwebui.sh
# Pulls the Open WebUI container image and creates the data directory
# for persistent storage (user accounts, chat history, settings).
#
# Usage:
# bash 06_setup_openwebui.sh
# ------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
DATA_DIR="${SCRIPT_DIR}/openwebui-data"
if [ -f "$SIF_FILE" ]; then
echo "Open WebUI container already exists at ${SIF_FILE}"
echo "Delete it first if you want to rebuild:"
echo " rm ${SIF_FILE}"
exit 0
fi
echo "=== Pulling Open WebUI container image ==="
echo " Source: ghcr.io/open-webui/open-webui:main"
echo " This may take 5-10 minutes (~4 GB)..."
echo ""
apptainer pull "$SIF_FILE" docker://ghcr.io/open-webui/open-webui:main
mkdir -p "$DATA_DIR"
echo ""
echo "=== Setup complete ==="
echo "Image: ${SIF_FILE} ($(du -sh "$SIF_FILE" | cut -f1))"
echo "Data dir: ${DATA_DIR}"
echo ""
echo "Next: bash 07_start_openwebui.sh"

70
07_start_openwebui.sh Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/env bash
# ------------------------------------------------------------------
# 07_start_openwebui.sh
# Starts Open WebUI connected to the vLLM inference server.
#
# Open WebUI provides a ChatGPT-like interface with:
# - User accounts & chat history (persisted in openwebui-data/)
# - Model selector (auto-discovers models from vLLM)
# - Streaming responses, markdown rendering, code highlighting
#
# The first user to sign up becomes the admin.
#
# Usage:
# bash 07_start_openwebui.sh # defaults
# PORT=7082 bash 07_start_openwebui.sh # custom port
# VLLM_BASE_URL=http://localhost:7080/v1 bash 07_start_openwebui.sh
#
# Environment variables:
# PORT — HTTP port for Open WebUI (default: 7081)
# VLLM_BASE_URL — vLLM OpenAI-compatible URL (default: http://localhost:7080/v1)
# VLLM_API_KEY — API key for vLLM (default: EMPTY)
# DATA_DIR — Persistent storage path (default: ./openwebui-data)
# ------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
PORT="${PORT:-7081}"
VLLM_BASE_URL="${VLLM_BASE_URL:-http://localhost:7080/v1}"
VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
DATA_DIR="${DATA_DIR:-${SCRIPT_DIR}/openwebui-data}"
if [ ! -f "$SIF_FILE" ]; then
echo "ERROR: Container image not found at ${SIF_FILE}"
echo " Run 06_setup_openwebui.sh first."
exit 1
fi
mkdir -p "$DATA_DIR"
echo "=== Starting Open WebUI ==="
echo " Port: ${PORT}"
echo " vLLM backend: ${VLLM_BASE_URL}"
echo " Data directory: ${DATA_DIR}"
echo ""
echo " Access at: http://$(hostname -f 2>/dev/null || hostname):${PORT}"
echo " First user to sign up becomes admin."
echo ""
echo " Press Ctrl+C to stop."
echo "==========================================="
echo ""
apptainer exec \
--writable-tmpfs \
--pwd /app/backend \
--bind "${DATA_DIR}:/app/backend/data" \
--env PORT="${PORT}" \
--env ENABLE_OPENAI_API="True" \
--env OPENAI_API_BASE_URLS="${VLLM_BASE_URL}" \
--env OPENAI_API_KEYS="${VLLM_API_KEY}" \
--env ENABLE_OLLAMA_API="False" \
--env ENABLE_SIGNUP="True" \
--env DEFAULT_USER_ROLE="user" \
--env WEBUI_NAME="Qwen3.5 LLM Server" \
--env OFFLINE_MODE="True" \
--env ENABLE_VERSION_UPDATE_CHECK="False" \
--env HF_HUB_OFFLINE="1" \
"$SIF_FILE" \
bash start.sh

View File

@ -0,0 +1,52 @@
#!/usr/bin/env bash
# ------------------------------------------------------------------
# 08_start_openwebui_background.sh
# Launches Open WebUI in the background with logging.
#
# Usage:
# bash 08_start_openwebui_background.sh
#
# Logs are written to: ./logs/openwebui_<timestamp>.log
# PID is written to: ./logs/openwebui.pid
# ------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
LOG_DIR="${SCRIPT_DIR}/logs"
mkdir -p "$LOG_DIR"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
LOG_FILE="${LOG_DIR}/openwebui_${TIMESTAMP}.log"
PID_FILE="${LOG_DIR}/openwebui.pid"
if [ -f "$PID_FILE" ]; then
OLD_PID=$(cat "$PID_FILE")
if kill -0 "$OLD_PID" 2>/dev/null; then
echo "Open WebUI already running with PID ${OLD_PID}"
echo "Stop it first: bash 09_stop_openwebui.sh"
exit 1
fi
fi
echo "Starting Open WebUI in background..."
echo "Log file: ${LOG_FILE}"
nohup bash "${SCRIPT_DIR}/07_start_openwebui.sh" > "$LOG_FILE" 2>&1 &
SERVER_PID=$!
echo "$SERVER_PID" > "$PID_FILE"
echo "Open WebUI PID: ${SERVER_PID}"
echo ""
echo "Monitor logs: tail -f ${LOG_FILE}"
echo "Stop: bash 09_stop_openwebui.sh"
echo ""
sleep 5
if kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Open WebUI process is running. Starting up..."
echo "(Ready when you see 'Uvicorn running' in the logs)"
else
echo "ERROR: Open WebUI process exited. Check logs:"
tail -20 "$LOG_FILE"
exit 1
fi

31
09_stop_openwebui.sh Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
# ------------------------------------------------------------------
# 09_stop_openwebui.sh
# Gracefully stops the background Open WebUI server.
# ------------------------------------------------------------------
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PID_FILE="${SCRIPT_DIR}/logs/openwebui.pid"
if [ ! -f "$PID_FILE" ]; then
echo "No PID file found. Open WebUI may not be running."
exit 0
fi
SERVER_PID=$(cat "$PID_FILE")
if kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Stopping Open WebUI (PID: ${SERVER_PID})..."
kill "$SERVER_PID"
sleep 2
if kill -0 "$SERVER_PID" 2>/dev/null; then
echo "Process still alive, sending SIGKILL..."
kill -9 "$SERVER_PID"
fi
echo "Open WebUI stopped."
else
echo "Open WebUI process (PID: ${SERVER_PID}) is not running."
fi
rm -f "$PID_FILE"

114
README.md
View File

@ -2,14 +2,21 @@
Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B**
(MoE, 35B total / 3B active per token), served via **vLLM** inside an
**Apptainer** container on a GPU server. Includes a **Streamlit web app** for
chat and file editing.
**Apptainer** container on a GPU server. Two front-ends are provided:
**Open WebUI** (server-hosted ChatGPT-like UI) and a **Streamlit app**
(local chat + file editor with code execution).
## Architecture
```
Students (Streamlit App / OpenAI SDK / curl)
Students
├── Browser ──► Open WebUI (silicon.fhgr.ch:7081)
│ │ ChatGPT-like UI, user accounts, chat history
│ │
├── Streamlit ─────┤ Local app with file editor & code runner
│ │
└── SDK / curl ────┘
┌──────────────────────────────┐
│ silicon.fhgr.ch:7080 │
@ -134,14 +141,90 @@ curl http://localhost:7080/v1/chat/completions \
-d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}'
```
### Step 7: Share with Students
### Step 7: Set Up Open WebUI (ChatGPT-like Interface)
Open WebUI provides a full-featured chat interface that runs on the server.
Students access it via a browser — no local setup required.
**Pull the container:**
```bash
bash 06_setup_openwebui.sh
```
**Start (foreground with tmux):**
```bash
tmux new -s webui
bash 07_start_openwebui.sh
# Ctrl+B, then D to detach
```
**Start (background with logging):**
```bash
bash 08_start_openwebui_background.sh
tail -f logs/openwebui_*.log
```
Open WebUI is ready when you see `Uvicorn running` in the logs.
Access it at `http://silicon.fhgr.ch:7081`.
> **Important**: The first user to sign up becomes the **admin**. Sign up
> yourself first before sharing the URL with students.
### Step 8: Share with Students
Distribute `STUDENT_GUIDE.md` with connection details:
- **Base URL**: `http://silicon.fhgr.ch:7080/v1`
- **Open WebUI**: `http://silicon.fhgr.ch:7081` (recommended for most students)
- **API Base URL**: `http://silicon.fhgr.ch:7080/v1` (for SDK / programmatic use)
- **Model name**: `qwen3.5-35b-a3b`
---
## Open WebUI
A server-hosted ChatGPT-like interface backed by the vLLM inference server.
Runs as an Apptainer container on port **7081**.
### Features
- User accounts with persistent chat history (stored in `openwebui-data/`)
- Auto-discovers models from the vLLM backend
- Streaming responses, markdown rendering, code highlighting
- Admin panel for managing users, models, and settings
- No local setup needed — students just open a browser
### Configuration
| Variable | Default | Description |
|----------|---------|-------------|
| `PORT` | `7081` | HTTP port for the UI |
| `VLLM_BASE_URL` | `http://localhost:7080/v1` | vLLM API endpoint |
| `VLLM_API_KEY` | `EMPTY` | API key (if vLLM requires one) |
| `DATA_DIR` | `./openwebui-data` | Persistent storage (DB, uploads) |
### Management
```bash
# Start in background
bash 08_start_openwebui_background.sh
# View logs
tail -f logs/openwebui_*.log
# Stop
bash 09_stop_openwebui.sh
# Reconnect to tmux session
tmux attach -t webui
```
### Data Persistence
All user data (accounts, chats, settings) is stored in `openwebui-data/`.
This directory is bind-mounted into the container, so data survives
container restarts. Back it up regularly.
---
## Streamlit App
A web-based chat and file editor that connects to the inference server.
@ -241,13 +324,17 @@ tmux attach -t llm
## Files Overview
| File | Purpose |
|----------------------------------|------------------------------------------------------|
|------------------------------------|------------------------------------------------------|
| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) |
| `01_build_container.sh` | Builds the Apptainer `.sif` image |
| `02_download_model.sh` | Downloads model weights (runs inside container) |
| `03_start_server.sh` | Starts vLLM server (foreground) |
| `04_start_server_background.sh` | Starts server in background with logging |
| `05_stop_server.sh` | Stops the background server |
| `04_start_server_background.sh` | Starts vLLM server in background with logging |
| `05_stop_server.sh` | Stops the background vLLM server |
| `06_setup_openwebui.sh` | Pulls the Open WebUI container image |
| `07_start_openwebui.sh` | Starts Open WebUI (foreground) |
| `08_start_openwebui_background.sh` | Starts Open WebUI in background with logging |
| `09_stop_openwebui.sh` | Stops the background Open WebUI |
| `app.py` | Streamlit chat & file editor web app |
| `requirements.txt` | Python dependencies for the Streamlit app |
| `test_server.py` | Tests the running server via CLI |
@ -285,6 +372,17 @@ tmux attach -t llm
- Disable thinking mode for faster simple responses
- Monitor: `curl http://localhost:7080/metrics`
### Open WebUI won't start
- Ensure the vLLM server is running first on port 7080
- Check that port 7081 is not already in use: `ss -tlnp | grep 7081`
- Check logs: `tail -50 logs/openwebui_*.log`
- If the database is corrupted, reset: `rm openwebui-data/webui.db` and restart
### Open WebUI shows no models
- Verify vLLM is reachable: `curl http://localhost:7080/v1/models`
- The OpenAI API base URL is set on first launch; if changed later, update
it in the Open WebUI Admin Panel > Settings > Connections
### Syncing files to the server
- No `git` or `pip` on the host — use `scp` from your local machine:
```bash

View File

@ -4,30 +4,93 @@
A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a
Mixture-of-Experts model (35B total parameters, 3B active per token), providing
fast and high-quality responses. You can interact with it using the
**OpenAI-compatible API**.
fast and high-quality responses.
There are **three ways** to interact with the model:
1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
2. **Streamlit App** — Local app with chat, file editor, and code execution
3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
> **Note**: You must be on the university network or VPN to reach the server.
## Connection Details
| Parameter | Value |
|------------- |---------------------------------------------|
| **Base URL** | `http://silicon.fhgr.ch:7080/v1` |
|------------------|---------------------------------------------|
| **Open WebUI** | `http://silicon.fhgr.ch:7081` |
| **API Base URL** | `http://silicon.fhgr.ch:7080/v1` |
| **Model** | `qwen3.5-35b-a3b` |
| **API Key** | *(ask your instructor — may be `EMPTY`)* |
> **Note**: You must be on the university network or VPN to reach the server.
---
## Option 1: Open WebUI (Recommended)
The easiest way to chat with the model — no installation required.
### Getting Started
1. Make sure you are connected to the **university network** (or VPN).
2. Open your browser and go to **http://silicon.fhgr.ch:7081**
3. Click **"Sign Up"** to create a new account:
- Enter your **name** (e.g. your first and last name)
- Enter your **email** (use your university email)
- Choose a **password**
- Click **"Create Account"**
4. After signing up you are logged in automatically.
5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
6. Type a message and press Enter — you're chatting with the LLM.
### Returning Later
- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
- Enter the email and password you used during sign-up.
- All your previous chats are still there.
### Features
- **Chat history** — all conversations are saved on the server and persist across sessions
- **Markdown rendering** with syntax-highlighted code blocks
- **Model selector** — auto-discovers available models from the server
- **Conversation branching** — edit previous messages and explore alternative responses
- **File upload** — attach files to your messages for the model to analyze
- **Search** — search across all your past conversations
### Tips
- Your account and chat history are stored on the server. You can log in
from any device on the university network.
- If you forget your password, ask your instructor to reset it via the
Admin Panel.
- The model works best when you provide clear, specific instructions.
- For code tasks, mention the programming language explicitly (e.g.
"Write a Python function that...").
- Long conversations use more context. Start a **New Chat** (top-left
button) when switching topics to get faster, more focused responses.
---
## Quick Start with Python
## Option 2: Streamlit App (Chat + File Editor)
### 1. Install the OpenAI SDK
A local app with chat, file editing, and Python/LaTeX execution.
See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
---
## Option 3: Python SDK / curl
For programmatic access and scripting.
### Quick Start with Python
#### 1. Install the OpenAI SDK
```bash
pip install openai
```
### 2. Simple Chat
#### 2. Simple Chat
```python
from openai import OpenAI
@ -50,7 +113,7 @@ response = client.chat.completions.create(
print(response.choices[0].message.content)
```
### 3. Streaming Responses
#### 3. Streaming Responses
```python
stream = client.chat.completions.create(
@ -70,7 +133,7 @@ print()
---
## Quick Start with curl
### Quick Start with curl
```bash
curl http://silicon.fhgr.ch:7080/v1/chat/completions \
@ -196,3 +259,5 @@ response = client.chat.completions.create(
| Slow responses | The model is shared — peak times may be slower |
| `401 Unauthorized` | Ask your instructor for the API key |
| Response cut off | Increase `max_tokens` in your request |
| Open WebUI login fails | Make sure you created an account first (Sign Up) |
| Open WebUI shows no models | The vLLM server may still be loading — wait a few minutes |

114
app.py
View File

@ -11,6 +11,7 @@ Usage:
"""
import re
import subprocess
import streamlit as st
from openai import OpenAI
from pathlib import Path
@ -51,6 +52,9 @@ LANG_MAP = {
}
MAX_CONTEXT = 32768
def extract_code(text: str, lang: str = "") -> str:
"""Extract the first fenced code block from markdown text.
Falls back to the full text if no code block is found."""
@ -61,6 +65,56 @@ def extract_code(text: str, lang: str = "") -> str:
return text.strip()
def estimate_tokens(messages: list[dict]) -> int:
"""Rough token estimate: ~4 characters per token."""
return sum(len(m["content"]) for m in messages) // 4
def trim_history(messages: list[dict], reserved: int) -> list[dict]:
"""Drop oldest message pairs to fit within context budget.
Always keeps the latest user message."""
budget = MAX_CONTEXT - reserved
while len(messages) > 1 and estimate_tokens(messages) > budget:
messages.pop(0)
return messages
RUNNABLE_EXTENSIONS = {".py", ".tex"}
RUN_TIMEOUT = 30
def run_file(file_path: Path) -> dict:
"""Execute a .py or .tex file and return stdout, stderr, and return code."""
suffix = file_path.suffix
cwd = file_path.parent.resolve()
if suffix == ".py":
cmd = ["python3", file_path.name]
elif suffix == ".tex":
cmd = [
"pdflatex",
"-interaction=nonstopmode",
f"-output-directory={cwd}",
file_path.name,
]
else:
return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
try:
proc = subprocess.run(
cmd,
cwd=cwd,
capture_output=True,
text=True,
timeout=RUN_TIMEOUT,
)
return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
except subprocess.TimeoutExpired:
return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
except FileNotFoundError as e:
return {"stdout": "", "stderr": str(e), "rc": -1}
# ---------------------------------------------------------------------------
# Sidebar — File Manager
# ---------------------------------------------------------------------------
@ -100,6 +154,10 @@ with tab_chat:
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages = trim_history(
st.session_state.messages, reserved=max_tokens
)
with st.chat_message("assistant"):
placeholder = st.empty()
full_response = ""
@ -123,6 +181,13 @@ with tab_chat:
st.session_state.messages.append({"role": "assistant", "content": full_response})
if st.session_state.messages:
used = estimate_tokens(st.session_state.messages)
pct = min(used / MAX_CONTEXT, 1.0)
label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
if pct > 0.8:
label += " ⚠️ nearing limit — older messages will be trimmed"
st.progress(pct, text=label)
col_clear, col_save = st.columns([1, 3])
with col_clear:
if st.button("Clear Chat"):
@ -149,7 +214,14 @@ with tab_editor:
content = file_path.read_text() if file_path.exists() else ""
suffix = file_path.suffix
lang = LANG_MAP.get(suffix, "text")
runnable = suffix in RUNNABLE_EXTENSIONS
if runnable:
col_edit, col_term = st.columns([3, 2])
else:
col_edit = st.container()
with col_edit:
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
edited = st.text_area(
@ -198,5 +270,47 @@ with tab_editor:
file_path.write_text(code)
st.success("File updated by LLM")
st.rerun()
if runnable:
with col_term:
run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
st.subheader("Terminal Output")
if st.button(run_label, type="primary"):
file_path.write_text(edited)
with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
result = run_file(file_path)
st.session_state["last_run"] = result
result = st.session_state.get("last_run")
if result:
if result["rc"] == 0:
st.success(f"Exit code: {result['rc']}")
else:
st.error(f"Exit code: {result['rc']}")
if result["stdout"]:
st.text_area(
"stdout",
value=result["stdout"],
height=300,
disabled=True,
key="run_stdout",
)
if result["stderr"]:
st.text_area(
"stderr",
value=result["stderr"],
height=200,
disabled=True,
key="run_stderr",
)
if not result["stdout"] and not result["stderr"]:
st.info("No output produced.")
else:
st.caption(
f"Click **{run_label}** to execute the file "
f"(timeout: {RUN_TIMEOUT}s)."
)
else:
st.info("Create a file in the sidebar to start editing.")