Add Open WebUI integration and enhance Streamlit app
- Add Open WebUI scripts (06-09) for server-hosted ChatGPT-like interface connected to the vLLM backend on port 7081 - Add context window management to chat (auto-trim, token counter, progress bar) - Add terminal output panel to file editor for running Python/LaTeX files - Update README with Open WebUI setup, architecture diagram, and troubleshooting - Update STUDENT_GUIDE with step-by-step Open WebUI login instructions Made-with: Cursor
This commit is contained in:
parent
d59285fe69
commit
f4fdaab732
3
.gitignore
vendored
3
.gitignore
vendored
@ -16,5 +16,8 @@ models/
|
|||||||
# Streamlit workspace files
|
# Streamlit workspace files
|
||||||
workspace/
|
workspace/
|
||||||
|
|
||||||
|
# Open WebUI persistent data (user accounts, chats, DB)
|
||||||
|
openwebui-data/
|
||||||
|
|
||||||
# macOS
|
# macOS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|||||||
37
06_setup_openwebui.sh
Executable file
37
06_setup_openwebui.sh
Executable file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 06_setup_openwebui.sh
|
||||||
|
# Pulls the Open WebUI container image and creates the data directory
|
||||||
|
# for persistent storage (user accounts, chat history, settings).
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash 06_setup_openwebui.sh
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
|
||||||
|
DATA_DIR="${SCRIPT_DIR}/openwebui-data"
|
||||||
|
|
||||||
|
if [ -f "$SIF_FILE" ]; then
|
||||||
|
echo "Open WebUI container already exists at ${SIF_FILE}"
|
||||||
|
echo "Delete it first if you want to rebuild:"
|
||||||
|
echo " rm ${SIF_FILE}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "=== Pulling Open WebUI container image ==="
|
||||||
|
echo " Source: ghcr.io/open-webui/open-webui:main"
|
||||||
|
echo " This may take 5-10 minutes (~4 GB)..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
apptainer pull "$SIF_FILE" docker://ghcr.io/open-webui/open-webui:main
|
||||||
|
|
||||||
|
mkdir -p "$DATA_DIR"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Setup complete ==="
|
||||||
|
echo "Image: ${SIF_FILE} ($(du -sh "$SIF_FILE" | cut -f1))"
|
||||||
|
echo "Data dir: ${DATA_DIR}"
|
||||||
|
echo ""
|
||||||
|
echo "Next: bash 07_start_openwebui.sh"
|
||||||
70
07_start_openwebui.sh
Executable file
70
07_start_openwebui.sh
Executable file
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 07_start_openwebui.sh
|
||||||
|
# Starts Open WebUI connected to the vLLM inference server.
|
||||||
|
#
|
||||||
|
# Open WebUI provides a ChatGPT-like interface with:
|
||||||
|
# - User accounts & chat history (persisted in openwebui-data/)
|
||||||
|
# - Model selector (auto-discovers models from vLLM)
|
||||||
|
# - Streaming responses, markdown rendering, code highlighting
|
||||||
|
#
|
||||||
|
# The first user to sign up becomes the admin.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash 07_start_openwebui.sh # defaults
|
||||||
|
# PORT=7082 bash 07_start_openwebui.sh # custom port
|
||||||
|
# VLLM_BASE_URL=http://localhost:7080/v1 bash 07_start_openwebui.sh
|
||||||
|
#
|
||||||
|
# Environment variables:
|
||||||
|
# PORT — HTTP port for Open WebUI (default: 7081)
|
||||||
|
# VLLM_BASE_URL — vLLM OpenAI-compatible URL (default: http://localhost:7080/v1)
|
||||||
|
# VLLM_API_KEY — API key for vLLM (default: EMPTY)
|
||||||
|
# DATA_DIR — Persistent storage path (default: ./openwebui-data)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
|
||||||
|
|
||||||
|
PORT="${PORT:-7081}"
|
||||||
|
VLLM_BASE_URL="${VLLM_BASE_URL:-http://localhost:7080/v1}"
|
||||||
|
VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
|
||||||
|
DATA_DIR="${DATA_DIR:-${SCRIPT_DIR}/openwebui-data}"
|
||||||
|
|
||||||
|
if [ ! -f "$SIF_FILE" ]; then
|
||||||
|
echo "ERROR: Container image not found at ${SIF_FILE}"
|
||||||
|
echo " Run 06_setup_openwebui.sh first."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$DATA_DIR"
|
||||||
|
|
||||||
|
echo "=== Starting Open WebUI ==="
|
||||||
|
echo " Port: ${PORT}"
|
||||||
|
echo " vLLM backend: ${VLLM_BASE_URL}"
|
||||||
|
echo " Data directory: ${DATA_DIR}"
|
||||||
|
echo ""
|
||||||
|
echo " Access at: http://$(hostname -f 2>/dev/null || hostname):${PORT}"
|
||||||
|
echo " First user to sign up becomes admin."
|
||||||
|
echo ""
|
||||||
|
echo " Press Ctrl+C to stop."
|
||||||
|
echo "==========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
apptainer exec \
|
||||||
|
--writable-tmpfs \
|
||||||
|
--pwd /app/backend \
|
||||||
|
--bind "${DATA_DIR}:/app/backend/data" \
|
||||||
|
--env PORT="${PORT}" \
|
||||||
|
--env ENABLE_OPENAI_API="True" \
|
||||||
|
--env OPENAI_API_BASE_URLS="${VLLM_BASE_URL}" \
|
||||||
|
--env OPENAI_API_KEYS="${VLLM_API_KEY}" \
|
||||||
|
--env ENABLE_OLLAMA_API="False" \
|
||||||
|
--env ENABLE_SIGNUP="True" \
|
||||||
|
--env DEFAULT_USER_ROLE="user" \
|
||||||
|
--env WEBUI_NAME="Qwen3.5 LLM Server" \
|
||||||
|
--env OFFLINE_MODE="True" \
|
||||||
|
--env ENABLE_VERSION_UPDATE_CHECK="False" \
|
||||||
|
--env HF_HUB_OFFLINE="1" \
|
||||||
|
"$SIF_FILE" \
|
||||||
|
bash start.sh
|
||||||
52
08_start_openwebui_background.sh
Executable file
52
08_start_openwebui_background.sh
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 08_start_openwebui_background.sh
|
||||||
|
# Launches Open WebUI in the background with logging.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash 08_start_openwebui_background.sh
|
||||||
|
#
|
||||||
|
# Logs are written to: ./logs/openwebui_<timestamp>.log
|
||||||
|
# PID is written to: ./logs/openwebui.pid
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
LOG_DIR="${SCRIPT_DIR}/logs"
|
||||||
|
mkdir -p "$LOG_DIR"
|
||||||
|
|
||||||
|
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
||||||
|
LOG_FILE="${LOG_DIR}/openwebui_${TIMESTAMP}.log"
|
||||||
|
PID_FILE="${LOG_DIR}/openwebui.pid"
|
||||||
|
|
||||||
|
if [ -f "$PID_FILE" ]; then
|
||||||
|
OLD_PID=$(cat "$PID_FILE")
|
||||||
|
if kill -0 "$OLD_PID" 2>/dev/null; then
|
||||||
|
echo "Open WebUI already running with PID ${OLD_PID}"
|
||||||
|
echo "Stop it first: bash 09_stop_openwebui.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting Open WebUI in background..."
|
||||||
|
echo "Log file: ${LOG_FILE}"
|
||||||
|
|
||||||
|
nohup bash "${SCRIPT_DIR}/07_start_openwebui.sh" > "$LOG_FILE" 2>&1 &
|
||||||
|
SERVER_PID=$!
|
||||||
|
echo "$SERVER_PID" > "$PID_FILE"
|
||||||
|
|
||||||
|
echo "Open WebUI PID: ${SERVER_PID}"
|
||||||
|
echo ""
|
||||||
|
echo "Monitor logs: tail -f ${LOG_FILE}"
|
||||||
|
echo "Stop: bash 09_stop_openwebui.sh"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
sleep 5
|
||||||
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||||
|
echo "Open WebUI process is running. Starting up..."
|
||||||
|
echo "(Ready when you see 'Uvicorn running' in the logs)"
|
||||||
|
else
|
||||||
|
echo "ERROR: Open WebUI process exited. Check logs:"
|
||||||
|
tail -20 "$LOG_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
31
09_stop_openwebui.sh
Executable file
31
09_stop_openwebui.sh
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 09_stop_openwebui.sh
|
||||||
|
# Gracefully stops the background Open WebUI server.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
PID_FILE="${SCRIPT_DIR}/logs/openwebui.pid"
|
||||||
|
|
||||||
|
if [ ! -f "$PID_FILE" ]; then
|
||||||
|
echo "No PID file found. Open WebUI may not be running."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
SERVER_PID=$(cat "$PID_FILE")
|
||||||
|
|
||||||
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||||
|
echo "Stopping Open WebUI (PID: ${SERVER_PID})..."
|
||||||
|
kill "$SERVER_PID"
|
||||||
|
sleep 2
|
||||||
|
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||||
|
echo "Process still alive, sending SIGKILL..."
|
||||||
|
kill -9 "$SERVER_PID"
|
||||||
|
fi
|
||||||
|
echo "Open WebUI stopped."
|
||||||
|
else
|
||||||
|
echo "Open WebUI process (PID: ${SERVER_PID}) is not running."
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f "$PID_FILE"
|
||||||
162
README.md
162
README.md
@ -2,28 +2,35 @@
|
|||||||
|
|
||||||
Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B**
|
Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B**
|
||||||
(MoE, 35B total / 3B active per token), served via **vLLM** inside an
|
(MoE, 35B total / 3B active per token), served via **vLLM** inside an
|
||||||
**Apptainer** container on a GPU server. Includes a **Streamlit web app** for
|
**Apptainer** container on a GPU server. Two front-ends are provided:
|
||||||
chat and file editing.
|
**Open WebUI** (server-hosted ChatGPT-like UI) and a **Streamlit app**
|
||||||
|
(local chat + file editor with code execution).
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
```
|
```
|
||||||
Students (Streamlit App / OpenAI SDK / curl)
|
Students
|
||||||
│
|
│
|
||||||
▼
|
├── Browser ──► Open WebUI (silicon.fhgr.ch:7081)
|
||||||
┌──────────────────────────────┐
|
│ │ ChatGPT-like UI, user accounts, chat history
|
||||||
│ silicon.fhgr.ch:7080 │
|
│ │
|
||||||
│ OpenAI-compatible API │
|
├── Streamlit ─────┤ Local app with file editor & code runner
|
||||||
├──────────────────────────────┤
|
│ │
|
||||||
│ vLLM Server (nightly) │
|
└── SDK / curl ────┘
|
||||||
│ Apptainer container (.sif) │
|
▼
|
||||||
├──────────────────────────────┤
|
┌──────────────────────────────┐
|
||||||
│ Qwen3.5-35B-A3B weights │
|
│ silicon.fhgr.ch:7080 │
|
||||||
│ (bind-mounted from host) │
|
│ OpenAI-compatible API │
|
||||||
├──────────────────────────────┤
|
├──────────────────────────────┤
|
||||||
│ 2× NVIDIA L40S (46 GB ea.) │
|
│ vLLM Server (nightly) │
|
||||||
│ Tensor Parallel = 2 │
|
│ Apptainer container (.sif) │
|
||||||
└──────────────────────────────┘
|
├──────────────────────────────┤
|
||||||
|
│ Qwen3.5-35B-A3B weights │
|
||||||
|
│ (bind-mounted from host) │
|
||||||
|
├──────────────────────────────┤
|
||||||
|
│ 2× NVIDIA L40S (46 GB ea.) │
|
||||||
|
│ Tensor Parallel = 2 │
|
||||||
|
└──────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
## Hardware
|
## Hardware
|
||||||
@ -134,14 +141,90 @@ curl http://localhost:7080/v1/chat/completions \
|
|||||||
-d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}'
|
-d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 7: Share with Students
|
### Step 7: Set Up Open WebUI (ChatGPT-like Interface)
|
||||||
|
|
||||||
|
Open WebUI provides a full-featured chat interface that runs on the server.
|
||||||
|
Students access it via a browser — no local setup required.
|
||||||
|
|
||||||
|
**Pull the container:**
|
||||||
|
```bash
|
||||||
|
bash 06_setup_openwebui.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
**Start (foreground with tmux):**
|
||||||
|
```bash
|
||||||
|
tmux new -s webui
|
||||||
|
bash 07_start_openwebui.sh
|
||||||
|
# Ctrl+B, then D to detach
|
||||||
|
```
|
||||||
|
|
||||||
|
**Start (background with logging):**
|
||||||
|
```bash
|
||||||
|
bash 08_start_openwebui_background.sh
|
||||||
|
tail -f logs/openwebui_*.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Open WebUI is ready when you see `Uvicorn running` in the logs.
|
||||||
|
Access it at `http://silicon.fhgr.ch:7081`.
|
||||||
|
|
||||||
|
> **Important**: The first user to sign up becomes the **admin**. Sign up
|
||||||
|
> yourself first before sharing the URL with students.
|
||||||
|
|
||||||
|
### Step 8: Share with Students
|
||||||
|
|
||||||
Distribute `STUDENT_GUIDE.md` with connection details:
|
Distribute `STUDENT_GUIDE.md` with connection details:
|
||||||
- **Base URL**: `http://silicon.fhgr.ch:7080/v1`
|
- **Open WebUI**: `http://silicon.fhgr.ch:7081` (recommended for most students)
|
||||||
|
- **API Base URL**: `http://silicon.fhgr.ch:7080/v1` (for SDK / programmatic use)
|
||||||
- **Model name**: `qwen3.5-35b-a3b`
|
- **Model name**: `qwen3.5-35b-a3b`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Open WebUI
|
||||||
|
|
||||||
|
A server-hosted ChatGPT-like interface backed by the vLLM inference server.
|
||||||
|
Runs as an Apptainer container on port **7081**.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
- User accounts with persistent chat history (stored in `openwebui-data/`)
|
||||||
|
- Auto-discovers models from the vLLM backend
|
||||||
|
- Streaming responses, markdown rendering, code highlighting
|
||||||
|
- Admin panel for managing users, models, and settings
|
||||||
|
- No local setup needed — students just open a browser
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `PORT` | `7081` | HTTP port for the UI |
|
||||||
|
| `VLLM_BASE_URL` | `http://localhost:7080/v1` | vLLM API endpoint |
|
||||||
|
| `VLLM_API_KEY` | `EMPTY` | API key (if vLLM requires one) |
|
||||||
|
| `DATA_DIR` | `./openwebui-data` | Persistent storage (DB, uploads) |
|
||||||
|
|
||||||
|
### Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start in background
|
||||||
|
bash 08_start_openwebui_background.sh
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
tail -f logs/openwebui_*.log
|
||||||
|
|
||||||
|
# Stop
|
||||||
|
bash 09_stop_openwebui.sh
|
||||||
|
|
||||||
|
# Reconnect to tmux session
|
||||||
|
tmux attach -t webui
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Persistence
|
||||||
|
|
||||||
|
All user data (accounts, chats, settings) is stored in `openwebui-data/`.
|
||||||
|
This directory is bind-mounted into the container, so data survives
|
||||||
|
container restarts. Back it up regularly.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Streamlit App
|
## Streamlit App
|
||||||
|
|
||||||
A web-based chat and file editor that connects to the inference server.
|
A web-based chat and file editor that connects to the inference server.
|
||||||
@ -240,18 +323,22 @@ tmux attach -t llm
|
|||||||
|
|
||||||
## Files Overview
|
## Files Overview
|
||||||
|
|
||||||
| File | Purpose |
|
| File | Purpose |
|
||||||
|----------------------------------|------------------------------------------------------|
|
|------------------------------------|------------------------------------------------------|
|
||||||
| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) |
|
| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) |
|
||||||
| `01_build_container.sh` | Builds the Apptainer `.sif` image |
|
| `01_build_container.sh` | Builds the Apptainer `.sif` image |
|
||||||
| `02_download_model.sh` | Downloads model weights (runs inside container) |
|
| `02_download_model.sh` | Downloads model weights (runs inside container) |
|
||||||
| `03_start_server.sh` | Starts vLLM server (foreground) |
|
| `03_start_server.sh` | Starts vLLM server (foreground) |
|
||||||
| `04_start_server_background.sh` | Starts server in background with logging |
|
| `04_start_server_background.sh` | Starts vLLM server in background with logging |
|
||||||
| `05_stop_server.sh` | Stops the background server |
|
| `05_stop_server.sh` | Stops the background vLLM server |
|
||||||
| `app.py` | Streamlit chat & file editor web app |
|
| `06_setup_openwebui.sh` | Pulls the Open WebUI container image |
|
||||||
| `requirements.txt` | Python dependencies for the Streamlit app |
|
| `07_start_openwebui.sh` | Starts Open WebUI (foreground) |
|
||||||
| `test_server.py` | Tests the running server via CLI |
|
| `08_start_openwebui_background.sh` | Starts Open WebUI in background with logging |
|
||||||
| `STUDENT_GUIDE.md` | Instructions for students |
|
| `09_stop_openwebui.sh` | Stops the background Open WebUI |
|
||||||
|
| `app.py` | Streamlit chat & file editor web app |
|
||||||
|
| `requirements.txt` | Python dependencies for the Streamlit app |
|
||||||
|
| `test_server.py` | Tests the running server via CLI |
|
||||||
|
| `STUDENT_GUIDE.md` | Instructions for students |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -285,6 +372,17 @@ tmux attach -t llm
|
|||||||
- Disable thinking mode for faster simple responses
|
- Disable thinking mode for faster simple responses
|
||||||
- Monitor: `curl http://localhost:7080/metrics`
|
- Monitor: `curl http://localhost:7080/metrics`
|
||||||
|
|
||||||
|
### Open WebUI won't start
|
||||||
|
- Ensure the vLLM server is running first on port 7080
|
||||||
|
- Check that port 7081 is not already in use: `ss -tlnp | grep 7081`
|
||||||
|
- Check logs: `tail -50 logs/openwebui_*.log`
|
||||||
|
- If the database is corrupted, reset: `rm openwebui-data/webui.db` and restart
|
||||||
|
|
||||||
|
### Open WebUI shows no models
|
||||||
|
- Verify vLLM is reachable: `curl http://localhost:7080/v1/models`
|
||||||
|
- The OpenAI API base URL is set on first launch; if changed later, update
|
||||||
|
it in the Open WebUI Admin Panel > Settings > Connections
|
||||||
|
|
||||||
### Syncing files to the server
|
### Syncing files to the server
|
||||||
- No `git` or `pip` on the host — use `scp` from your local machine:
|
- No `git` or `pip` on the host — use `scp` from your local machine:
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@ -4,30 +4,93 @@
|
|||||||
|
|
||||||
A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a
|
A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a
|
||||||
Mixture-of-Experts model (35B total parameters, 3B active per token), providing
|
Mixture-of-Experts model (35B total parameters, 3B active per token), providing
|
||||||
fast and high-quality responses. You can interact with it using the
|
fast and high-quality responses.
|
||||||
**OpenAI-compatible API**.
|
|
||||||
|
|
||||||
## Connection Details
|
There are **three ways** to interact with the model:
|
||||||
|
|
||||||
| Parameter | Value |
|
1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
|
||||||
|------------- |---------------------------------------------|
|
2. **Streamlit App** — Local app with chat, file editor, and code execution
|
||||||
| **Base URL** | `http://silicon.fhgr.ch:7080/v1` |
|
3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
|
||||||
| **Model** | `qwen3.5-35b-a3b` |
|
|
||||||
| **API Key** | *(ask your instructor — may be `EMPTY`)* |
|
|
||||||
|
|
||||||
> **Note**: You must be on the university network or VPN to reach the server.
|
> **Note**: You must be on the university network or VPN to reach the server.
|
||||||
|
|
||||||
|
## Connection Details
|
||||||
|
|
||||||
|
| Parameter | Value |
|
||||||
|
|------------------|---------------------------------------------|
|
||||||
|
| **Open WebUI** | `http://silicon.fhgr.ch:7081` |
|
||||||
|
| **API Base URL** | `http://silicon.fhgr.ch:7080/v1` |
|
||||||
|
| **Model** | `qwen3.5-35b-a3b` |
|
||||||
|
| **API Key** | *(ask your instructor — may be `EMPTY`)* |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Quick Start with Python
|
## Option 1: Open WebUI (Recommended)
|
||||||
|
|
||||||
### 1. Install the OpenAI SDK
|
The easiest way to chat with the model — no installation required.
|
||||||
|
|
||||||
|
### Getting Started
|
||||||
|
|
||||||
|
1. Make sure you are connected to the **university network** (or VPN).
|
||||||
|
2. Open your browser and go to **http://silicon.fhgr.ch:7081**
|
||||||
|
3. Click **"Sign Up"** to create a new account:
|
||||||
|
- Enter your **name** (e.g. your first and last name)
|
||||||
|
- Enter your **email** (use your university email)
|
||||||
|
- Choose a **password**
|
||||||
|
- Click **"Create Account"**
|
||||||
|
4. After signing up you are logged in automatically.
|
||||||
|
5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
|
||||||
|
6. Type a message and press Enter — you're chatting with the LLM.
|
||||||
|
|
||||||
|
### Returning Later
|
||||||
|
|
||||||
|
- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
|
||||||
|
- Enter the email and password you used during sign-up.
|
||||||
|
- All your previous chats are still there.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
- **Chat history** — all conversations are saved on the server and persist across sessions
|
||||||
|
- **Markdown rendering** with syntax-highlighted code blocks
|
||||||
|
- **Model selector** — auto-discovers available models from the server
|
||||||
|
- **Conversation branching** — edit previous messages and explore alternative responses
|
||||||
|
- **File upload** — attach files to your messages for the model to analyze
|
||||||
|
- **Search** — search across all your past conversations
|
||||||
|
|
||||||
|
### Tips
|
||||||
|
|
||||||
|
- Your account and chat history are stored on the server. You can log in
|
||||||
|
from any device on the university network.
|
||||||
|
- If you forget your password, ask your instructor to reset it via the
|
||||||
|
Admin Panel.
|
||||||
|
- The model works best when you provide clear, specific instructions.
|
||||||
|
- For code tasks, mention the programming language explicitly (e.g.
|
||||||
|
"Write a Python function that...").
|
||||||
|
- Long conversations use more context. Start a **New Chat** (top-left
|
||||||
|
button) when switching topics to get faster, more focused responses.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Option 2: Streamlit App (Chat + File Editor)
|
||||||
|
|
||||||
|
A local app with chat, file editing, and Python/LaTeX execution.
|
||||||
|
See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Option 3: Python SDK / curl
|
||||||
|
|
||||||
|
For programmatic access and scripting.
|
||||||
|
|
||||||
|
### Quick Start with Python
|
||||||
|
|
||||||
|
#### 1. Install the OpenAI SDK
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install openai
|
pip install openai
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Simple Chat
|
#### 2. Simple Chat
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
@ -50,7 +113,7 @@ response = client.chat.completions.create(
|
|||||||
print(response.choices[0].message.content)
|
print(response.choices[0].message.content)
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Streaming Responses
|
#### 3. Streaming Responses
|
||||||
|
|
||||||
```python
|
```python
|
||||||
stream = client.chat.completions.create(
|
stream = client.chat.completions.create(
|
||||||
@ -70,7 +133,7 @@ print()
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Quick Start with curl
|
### Quick Start with curl
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://silicon.fhgr.ch:7080/v1/chat/completions \
|
curl http://silicon.fhgr.ch:7080/v1/chat/completions \
|
||||||
@ -196,3 +259,5 @@ response = client.chat.completions.create(
|
|||||||
| Slow responses | The model is shared — peak times may be slower |
|
| Slow responses | The model is shared — peak times may be slower |
|
||||||
| `401 Unauthorized` | Ask your instructor for the API key |
|
| `401 Unauthorized` | Ask your instructor for the API key |
|
||||||
| Response cut off | Increase `max_tokens` in your request |
|
| Response cut off | Increase `max_tokens` in your request |
|
||||||
|
| Open WebUI login fails | Make sure you created an account first (Sign Up) |
|
||||||
|
| Open WebUI shows no models | The vLLM server may still be loading — wait a few minutes |
|
||||||
|
|||||||
202
app.py
202
app.py
@ -11,6 +11,7 @@ Usage:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -51,6 +52,9 @@ LANG_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MAX_CONTEXT = 32768
|
||||||
|
|
||||||
|
|
||||||
def extract_code(text: str, lang: str = "") -> str:
|
def extract_code(text: str, lang: str = "") -> str:
|
||||||
"""Extract the first fenced code block from markdown text.
|
"""Extract the first fenced code block from markdown text.
|
||||||
Falls back to the full text if no code block is found."""
|
Falls back to the full text if no code block is found."""
|
||||||
@ -61,6 +65,56 @@ def extract_code(text: str, lang: str = "") -> str:
|
|||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_tokens(messages: list[dict]) -> int:
|
||||||
|
"""Rough token estimate: ~4 characters per token."""
|
||||||
|
return sum(len(m["content"]) for m in messages) // 4
|
||||||
|
|
||||||
|
|
||||||
|
def trim_history(messages: list[dict], reserved: int) -> list[dict]:
|
||||||
|
"""Drop oldest message pairs to fit within context budget.
|
||||||
|
Always keeps the latest user message."""
|
||||||
|
budget = MAX_CONTEXT - reserved
|
||||||
|
while len(messages) > 1 and estimate_tokens(messages) > budget:
|
||||||
|
messages.pop(0)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
RUNNABLE_EXTENSIONS = {".py", ".tex"}
|
||||||
|
RUN_TIMEOUT = 30
|
||||||
|
|
||||||
|
|
||||||
|
def run_file(file_path: Path) -> dict:
|
||||||
|
"""Execute a .py or .tex file and return stdout, stderr, and return code."""
|
||||||
|
suffix = file_path.suffix
|
||||||
|
cwd = file_path.parent.resolve()
|
||||||
|
|
||||||
|
if suffix == ".py":
|
||||||
|
cmd = ["python3", file_path.name]
|
||||||
|
elif suffix == ".tex":
|
||||||
|
cmd = [
|
||||||
|
"pdflatex",
|
||||||
|
"-interaction=nonstopmode",
|
||||||
|
f"-output-directory={cwd}",
|
||||||
|
file_path.name,
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
cwd=cwd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=RUN_TIMEOUT,
|
||||||
|
)
|
||||||
|
return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
return {"stdout": "", "stderr": str(e), "rc": -1}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Sidebar — File Manager
|
# Sidebar — File Manager
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -100,6 +154,10 @@ with tab_chat:
|
|||||||
with st.chat_message("user"):
|
with st.chat_message("user"):
|
||||||
st.markdown(prompt)
|
st.markdown(prompt)
|
||||||
|
|
||||||
|
st.session_state.messages = trim_history(
|
||||||
|
st.session_state.messages, reserved=max_tokens
|
||||||
|
)
|
||||||
|
|
||||||
with st.chat_message("assistant"):
|
with st.chat_message("assistant"):
|
||||||
placeholder = st.empty()
|
placeholder = st.empty()
|
||||||
full_response = ""
|
full_response = ""
|
||||||
@ -123,6 +181,13 @@ with tab_chat:
|
|||||||
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
||||||
|
|
||||||
if st.session_state.messages:
|
if st.session_state.messages:
|
||||||
|
used = estimate_tokens(st.session_state.messages)
|
||||||
|
pct = min(used / MAX_CONTEXT, 1.0)
|
||||||
|
label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
|
||||||
|
if pct > 0.8:
|
||||||
|
label += " ⚠️ nearing limit — older messages will be trimmed"
|
||||||
|
st.progress(pct, text=label)
|
||||||
|
|
||||||
col_clear, col_save = st.columns([1, 3])
|
col_clear, col_save = st.columns([1, 3])
|
||||||
with col_clear:
|
with col_clear:
|
||||||
if st.button("Clear Chat"):
|
if st.button("Clear Chat"):
|
||||||
@ -149,54 +214,103 @@ with tab_editor:
|
|||||||
content = file_path.read_text() if file_path.exists() else ""
|
content = file_path.read_text() if file_path.exists() else ""
|
||||||
suffix = file_path.suffix
|
suffix = file_path.suffix
|
||||||
lang = LANG_MAP.get(suffix, "text")
|
lang = LANG_MAP.get(suffix, "text")
|
||||||
|
runnable = suffix in RUNNABLE_EXTENSIONS
|
||||||
|
|
||||||
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
|
if runnable:
|
||||||
|
col_edit, col_term = st.columns([3, 2])
|
||||||
|
else:
|
||||||
|
col_edit = st.container()
|
||||||
|
|
||||||
edited = st.text_area(
|
with col_edit:
|
||||||
"Edit below:",
|
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
|
||||||
value=content,
|
|
||||||
height=400,
|
|
||||||
key=f"editor_{selected_file}_{hash(content)}",
|
|
||||||
)
|
|
||||||
|
|
||||||
col_save, col_gen = st.columns(2)
|
edited = st.text_area(
|
||||||
|
"Edit below:",
|
||||||
with col_save:
|
value=content,
|
||||||
if st.button("Save File"):
|
height=400,
|
||||||
file_path.write_text(edited)
|
key=f"editor_{selected_file}_{hash(content)}",
|
||||||
st.success(f"Saved {selected_file}")
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
with col_gen:
|
|
||||||
gen_prompt = st.text_input(
|
|
||||||
"Generation instruction",
|
|
||||||
placeholder="e.g. Add error handling / Fix the LaTeX formatting",
|
|
||||||
key="gen_prompt",
|
|
||||||
)
|
)
|
||||||
if st.button("Generate with LLM") and gen_prompt:
|
|
||||||
with st.spinner("Generating..."):
|
col_save, col_gen = st.columns(2)
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=MODEL,
|
with col_save:
|
||||||
messages=[
|
if st.button("Save File"):
|
||||||
{"role": "system", "content": (
|
file_path.write_text(edited)
|
||||||
f"You are a coding assistant. The user has a {lang} file. "
|
st.success(f"Saved {selected_file}")
|
||||||
"Return ONLY the raw file content inside a single code block. "
|
|
||||||
"No explanations, no comments about changes."
|
|
||||||
)},
|
|
||||||
{"role": "user", "content": (
|
|
||||||
f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
|
|
||||||
f"Instruction: {gen_prompt}"
|
|
||||||
)},
|
|
||||||
],
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
temperature=temperature,
|
|
||||||
top_p=top_p,
|
|
||||||
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
|
|
||||||
)
|
|
||||||
result = response.choices[0].message.content
|
|
||||||
code = extract_code(result, lang)
|
|
||||||
file_path.write_text(code)
|
|
||||||
st.success("File updated by LLM")
|
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
|
with col_gen:
|
||||||
|
gen_prompt = st.text_input(
|
||||||
|
"Generation instruction",
|
||||||
|
placeholder="e.g. Add error handling / Fix the LaTeX formatting",
|
||||||
|
key="gen_prompt",
|
||||||
|
)
|
||||||
|
if st.button("Generate with LLM") and gen_prompt:
|
||||||
|
with st.spinner("Generating..."):
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=MODEL,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": (
|
||||||
|
f"You are a coding assistant. The user has a {lang} file. "
|
||||||
|
"Return ONLY the raw file content inside a single code block. "
|
||||||
|
"No explanations, no comments about changes."
|
||||||
|
)},
|
||||||
|
{"role": "user", "content": (
|
||||||
|
f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
|
||||||
|
f"Instruction: {gen_prompt}"
|
||||||
|
)},
|
||||||
|
],
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
temperature=temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
|
||||||
|
)
|
||||||
|
result = response.choices[0].message.content
|
||||||
|
code = extract_code(result, lang)
|
||||||
|
file_path.write_text(code)
|
||||||
|
st.success("File updated by LLM")
|
||||||
|
st.rerun()
|
||||||
|
|
||||||
|
if runnable:
|
||||||
|
with col_term:
|
||||||
|
run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
|
||||||
|
st.subheader("Terminal Output")
|
||||||
|
|
||||||
|
if st.button(run_label, type="primary"):
|
||||||
|
file_path.write_text(edited)
|
||||||
|
with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
|
||||||
|
result = run_file(file_path)
|
||||||
|
st.session_state["last_run"] = result
|
||||||
|
|
||||||
|
result = st.session_state.get("last_run")
|
||||||
|
if result:
|
||||||
|
if result["rc"] == 0:
|
||||||
|
st.success(f"Exit code: {result['rc']}")
|
||||||
|
else:
|
||||||
|
st.error(f"Exit code: {result['rc']}")
|
||||||
|
|
||||||
|
if result["stdout"]:
|
||||||
|
st.text_area(
|
||||||
|
"stdout",
|
||||||
|
value=result["stdout"],
|
||||||
|
height=300,
|
||||||
|
disabled=True,
|
||||||
|
key="run_stdout",
|
||||||
|
)
|
||||||
|
if result["stderr"]:
|
||||||
|
st.text_area(
|
||||||
|
"stderr",
|
||||||
|
value=result["stderr"],
|
||||||
|
height=200,
|
||||||
|
disabled=True,
|
||||||
|
key="run_stderr",
|
||||||
|
)
|
||||||
|
if not result["stdout"] and not result["stderr"]:
|
||||||
|
st.info("No output produced.")
|
||||||
|
else:
|
||||||
|
st.caption(
|
||||||
|
f"Click **{run_label}** to execute the file "
|
||||||
|
f"(timeout: {RUN_TIMEOUT}s)."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
st.info("Create a file in the sidebar to start editing.")
|
st.info("Create a file in the sidebar to start editing.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user