Add Open WebUI integration and enhance Streamlit app
- Add Open WebUI scripts (06-09) for server-hosted ChatGPT-like interface connected to the vLLM backend on port 7081 - Add context window management to chat (auto-trim, token counter, progress bar) - Add terminal output panel to file editor for running Python/LaTeX files - Update README with Open WebUI setup, architecture diagram, and troubleshooting - Update STUDENT_GUIDE with step-by-step Open WebUI login instructions Made-with: Cursor
This commit is contained in:
parent
d59285fe69
commit
f4fdaab732
3
.gitignore
vendored
3
.gitignore
vendored
@ -16,5 +16,8 @@ models/
|
||||
# Streamlit workspace files
|
||||
workspace/
|
||||
|
||||
# Open WebUI persistent data (user accounts, chats, DB)
|
||||
openwebui-data/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
37
06_setup_openwebui.sh
Executable file
37
06_setup_openwebui.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
# ------------------------------------------------------------------
|
||||
# 06_setup_openwebui.sh
|
||||
# Pulls the Open WebUI container image and creates the data directory
|
||||
# for persistent storage (user accounts, chat history, settings).
|
||||
#
|
||||
# Usage:
|
||||
# bash 06_setup_openwebui.sh
|
||||
# ------------------------------------------------------------------
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
|
||||
DATA_DIR="${SCRIPT_DIR}/openwebui-data"
|
||||
|
||||
if [ -f "$SIF_FILE" ]; then
|
||||
echo "Open WebUI container already exists at ${SIF_FILE}"
|
||||
echo "Delete it first if you want to rebuild:"
|
||||
echo " rm ${SIF_FILE}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "=== Pulling Open WebUI container image ==="
|
||||
echo " Source: ghcr.io/open-webui/open-webui:main"
|
||||
echo " This may take 5-10 minutes (~4 GB)..."
|
||||
echo ""
|
||||
|
||||
apptainer pull "$SIF_FILE" docker://ghcr.io/open-webui/open-webui:main
|
||||
|
||||
mkdir -p "$DATA_DIR"
|
||||
|
||||
echo ""
|
||||
echo "=== Setup complete ==="
|
||||
echo "Image: ${SIF_FILE} ($(du -sh "$SIF_FILE" | cut -f1))"
|
||||
echo "Data dir: ${DATA_DIR}"
|
||||
echo ""
|
||||
echo "Next: bash 07_start_openwebui.sh"
|
||||
70
07_start_openwebui.sh
Executable file
70
07_start_openwebui.sh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
# ------------------------------------------------------------------
|
||||
# 07_start_openwebui.sh
|
||||
# Starts Open WebUI connected to the vLLM inference server.
|
||||
#
|
||||
# Open WebUI provides a ChatGPT-like interface with:
|
||||
# - User accounts & chat history (persisted in openwebui-data/)
|
||||
# - Model selector (auto-discovers models from vLLM)
|
||||
# - Streaming responses, markdown rendering, code highlighting
|
||||
#
|
||||
# The first user to sign up becomes the admin.
|
||||
#
|
||||
# Usage:
|
||||
# bash 07_start_openwebui.sh # defaults
|
||||
# PORT=7082 bash 07_start_openwebui.sh # custom port
|
||||
# VLLM_BASE_URL=http://localhost:7080/v1 bash 07_start_openwebui.sh
|
||||
#
|
||||
# Environment variables:
|
||||
# PORT — HTTP port for Open WebUI (default: 7081)
|
||||
# VLLM_BASE_URL — vLLM OpenAI-compatible URL (default: http://localhost:7080/v1)
|
||||
# VLLM_API_KEY — API key for vLLM (default: EMPTY)
|
||||
# DATA_DIR — Persistent storage path (default: ./openwebui-data)
|
||||
# ------------------------------------------------------------------
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
|
||||
|
||||
PORT="${PORT:-7081}"
|
||||
VLLM_BASE_URL="${VLLM_BASE_URL:-http://localhost:7080/v1}"
|
||||
VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
|
||||
DATA_DIR="${DATA_DIR:-${SCRIPT_DIR}/openwebui-data}"
|
||||
|
||||
if [ ! -f "$SIF_FILE" ]; then
|
||||
echo "ERROR: Container image not found at ${SIF_FILE}"
|
||||
echo " Run 06_setup_openwebui.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$DATA_DIR"
|
||||
|
||||
echo "=== Starting Open WebUI ==="
|
||||
echo " Port: ${PORT}"
|
||||
echo " vLLM backend: ${VLLM_BASE_URL}"
|
||||
echo " Data directory: ${DATA_DIR}"
|
||||
echo ""
|
||||
echo " Access at: http://$(hostname -f 2>/dev/null || hostname):${PORT}"
|
||||
echo " First user to sign up becomes admin."
|
||||
echo ""
|
||||
echo " Press Ctrl+C to stop."
|
||||
echo "==========================================="
|
||||
echo ""
|
||||
|
||||
apptainer exec \
|
||||
--writable-tmpfs \
|
||||
--pwd /app/backend \
|
||||
--bind "${DATA_DIR}:/app/backend/data" \
|
||||
--env PORT="${PORT}" \
|
||||
--env ENABLE_OPENAI_API="True" \
|
||||
--env OPENAI_API_BASE_URLS="${VLLM_BASE_URL}" \
|
||||
--env OPENAI_API_KEYS="${VLLM_API_KEY}" \
|
||||
--env ENABLE_OLLAMA_API="False" \
|
||||
--env ENABLE_SIGNUP="True" \
|
||||
--env DEFAULT_USER_ROLE="user" \
|
||||
--env WEBUI_NAME="Qwen3.5 LLM Server" \
|
||||
--env OFFLINE_MODE="True" \
|
||||
--env ENABLE_VERSION_UPDATE_CHECK="False" \
|
||||
--env HF_HUB_OFFLINE="1" \
|
||||
"$SIF_FILE" \
|
||||
bash start.sh
|
||||
52
08_start_openwebui_background.sh
Executable file
52
08_start_openwebui_background.sh
Executable file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
# ------------------------------------------------------------------
|
||||
# 08_start_openwebui_background.sh
|
||||
# Launches Open WebUI in the background with logging.
|
||||
#
|
||||
# Usage:
|
||||
# bash 08_start_openwebui_background.sh
|
||||
#
|
||||
# Logs are written to: ./logs/openwebui_<timestamp>.log
|
||||
# PID is written to: ./logs/openwebui.pid
|
||||
# ------------------------------------------------------------------
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
LOG_DIR="${SCRIPT_DIR}/logs"
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
||||
LOG_FILE="${LOG_DIR}/openwebui_${TIMESTAMP}.log"
|
||||
PID_FILE="${LOG_DIR}/openwebui.pid"
|
||||
|
||||
if [ -f "$PID_FILE" ]; then
|
||||
OLD_PID=$(cat "$PID_FILE")
|
||||
if kill -0 "$OLD_PID" 2>/dev/null; then
|
||||
echo "Open WebUI already running with PID ${OLD_PID}"
|
||||
echo "Stop it first: bash 09_stop_openwebui.sh"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Starting Open WebUI in background..."
|
||||
echo "Log file: ${LOG_FILE}"
|
||||
|
||||
nohup bash "${SCRIPT_DIR}/07_start_openwebui.sh" > "$LOG_FILE" 2>&1 &
|
||||
SERVER_PID=$!
|
||||
echo "$SERVER_PID" > "$PID_FILE"
|
||||
|
||||
echo "Open WebUI PID: ${SERVER_PID}"
|
||||
echo ""
|
||||
echo "Monitor logs: tail -f ${LOG_FILE}"
|
||||
echo "Stop: bash 09_stop_openwebui.sh"
|
||||
echo ""
|
||||
|
||||
sleep 5
|
||||
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
echo "Open WebUI process is running. Starting up..."
|
||||
echo "(Ready when you see 'Uvicorn running' in the logs)"
|
||||
else
|
||||
echo "ERROR: Open WebUI process exited. Check logs:"
|
||||
tail -20 "$LOG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
31
09_stop_openwebui.sh
Executable file
31
09_stop_openwebui.sh
Executable file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
# ------------------------------------------------------------------
|
||||
# 09_stop_openwebui.sh
|
||||
# Gracefully stops the background Open WebUI server.
|
||||
# ------------------------------------------------------------------
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PID_FILE="${SCRIPT_DIR}/logs/openwebui.pid"
|
||||
|
||||
if [ ! -f "$PID_FILE" ]; then
|
||||
echo "No PID file found. Open WebUI may not be running."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
SERVER_PID=$(cat "$PID_FILE")
|
||||
|
||||
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
echo "Stopping Open WebUI (PID: ${SERVER_PID})..."
|
||||
kill "$SERVER_PID"
|
||||
sleep 2
|
||||
if kill -0 "$SERVER_PID" 2>/dev/null; then
|
||||
echo "Process still alive, sending SIGKILL..."
|
||||
kill -9 "$SERVER_PID"
|
||||
fi
|
||||
echo "Open WebUI stopped."
|
||||
else
|
||||
echo "Open WebUI process (PID: ${SERVER_PID}) is not running."
|
||||
fi
|
||||
|
||||
rm -f "$PID_FILE"
|
||||
162
README.md
162
README.md
@ -2,28 +2,35 @@
|
||||
|
||||
Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B**
|
||||
(MoE, 35B total / 3B active per token), served via **vLLM** inside an
|
||||
**Apptainer** container on a GPU server. Includes a **Streamlit web app** for
|
||||
chat and file editing.
|
||||
**Apptainer** container on a GPU server. Two front-ends are provided:
|
||||
**Open WebUI** (server-hosted ChatGPT-like UI) and a **Streamlit app**
|
||||
(local chat + file editor with code execution).
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Students (Streamlit App / OpenAI SDK / curl)
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────┐
|
||||
│ silicon.fhgr.ch:7080 │
|
||||
│ OpenAI-compatible API │
|
||||
├──────────────────────────────┤
|
||||
│ vLLM Server (nightly) │
|
||||
│ Apptainer container (.sif) │
|
||||
├──────────────────────────────┤
|
||||
│ Qwen3.5-35B-A3B weights │
|
||||
│ (bind-mounted from host) │
|
||||
├──────────────────────────────┤
|
||||
│ 2× NVIDIA L40S (46 GB ea.) │
|
||||
│ Tensor Parallel = 2 │
|
||||
└──────────────────────────────┘
|
||||
Students
|
||||
│
|
||||
├── Browser ──► Open WebUI (silicon.fhgr.ch:7081)
|
||||
│ │ ChatGPT-like UI, user accounts, chat history
|
||||
│ │
|
||||
├── Streamlit ─────┤ Local app with file editor & code runner
|
||||
│ │
|
||||
└── SDK / curl ────┘
|
||||
▼
|
||||
┌──────────────────────────────┐
|
||||
│ silicon.fhgr.ch:7080 │
|
||||
│ OpenAI-compatible API │
|
||||
├──────────────────────────────┤
|
||||
│ vLLM Server (nightly) │
|
||||
│ Apptainer container (.sif) │
|
||||
├──────────────────────────────┤
|
||||
│ Qwen3.5-35B-A3B weights │
|
||||
│ (bind-mounted from host) │
|
||||
├──────────────────────────────┤
|
||||
│ 2× NVIDIA L40S (46 GB ea.) │
|
||||
│ Tensor Parallel = 2 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## Hardware
|
||||
@ -134,14 +141,90 @@ curl http://localhost:7080/v1/chat/completions \
|
||||
-d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}'
|
||||
```
|
||||
|
||||
### Step 7: Share with Students
|
||||
### Step 7: Set Up Open WebUI (ChatGPT-like Interface)
|
||||
|
||||
Open WebUI provides a full-featured chat interface that runs on the server.
|
||||
Students access it via a browser — no local setup required.
|
||||
|
||||
**Pull the container:**
|
||||
```bash
|
||||
bash 06_setup_openwebui.sh
|
||||
```
|
||||
|
||||
**Start (foreground with tmux):**
|
||||
```bash
|
||||
tmux new -s webui
|
||||
bash 07_start_openwebui.sh
|
||||
# Ctrl+B, then D to detach
|
||||
```
|
||||
|
||||
**Start (background with logging):**
|
||||
```bash
|
||||
bash 08_start_openwebui_background.sh
|
||||
tail -f logs/openwebui_*.log
|
||||
```
|
||||
|
||||
Open WebUI is ready when you see `Uvicorn running` in the logs.
|
||||
Access it at `http://silicon.fhgr.ch:7081`.
|
||||
|
||||
> **Important**: The first user to sign up becomes the **admin**. Sign up
|
||||
> yourself first before sharing the URL with students.
|
||||
|
||||
### Step 8: Share with Students
|
||||
|
||||
Distribute `STUDENT_GUIDE.md` with connection details:
|
||||
- **Base URL**: `http://silicon.fhgr.ch:7080/v1`
|
||||
- **Open WebUI**: `http://silicon.fhgr.ch:7081` (recommended for most students)
|
||||
- **API Base URL**: `http://silicon.fhgr.ch:7080/v1` (for SDK / programmatic use)
|
||||
- **Model name**: `qwen3.5-35b-a3b`
|
||||
|
||||
---
|
||||
|
||||
## Open WebUI
|
||||
|
||||
A server-hosted ChatGPT-like interface backed by the vLLM inference server.
|
||||
Runs as an Apptainer container on port **7081**.
|
||||
|
||||
### Features
|
||||
|
||||
- User accounts with persistent chat history (stored in `openwebui-data/`)
|
||||
- Auto-discovers models from the vLLM backend
|
||||
- Streaming responses, markdown rendering, code highlighting
|
||||
- Admin panel for managing users, models, and settings
|
||||
- No local setup needed — students just open a browser
|
||||
|
||||
### Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `PORT` | `7081` | HTTP port for the UI |
|
||||
| `VLLM_BASE_URL` | `http://localhost:7080/v1` | vLLM API endpoint |
|
||||
| `VLLM_API_KEY` | `EMPTY` | API key (if vLLM requires one) |
|
||||
| `DATA_DIR` | `./openwebui-data` | Persistent storage (DB, uploads) |
|
||||
|
||||
### Management
|
||||
|
||||
```bash
|
||||
# Start in background
|
||||
bash 08_start_openwebui_background.sh
|
||||
|
||||
# View logs
|
||||
tail -f logs/openwebui_*.log
|
||||
|
||||
# Stop
|
||||
bash 09_stop_openwebui.sh
|
||||
|
||||
# Reconnect to tmux session
|
||||
tmux attach -t webui
|
||||
```
|
||||
|
||||
### Data Persistence
|
||||
|
||||
All user data (accounts, chats, settings) is stored in `openwebui-data/`.
|
||||
This directory is bind-mounted into the container, so data survives
|
||||
container restarts. Back it up regularly.
|
||||
|
||||
---
|
||||
|
||||
## Streamlit App
|
||||
|
||||
A web-based chat and file editor that connects to the inference server.
|
||||
@ -240,18 +323,22 @@ tmux attach -t llm
|
||||
|
||||
## Files Overview
|
||||
|
||||
| File | Purpose |
|
||||
|----------------------------------|------------------------------------------------------|
|
||||
| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) |
|
||||
| `01_build_container.sh` | Builds the Apptainer `.sif` image |
|
||||
| `02_download_model.sh` | Downloads model weights (runs inside container) |
|
||||
| `03_start_server.sh` | Starts vLLM server (foreground) |
|
||||
| `04_start_server_background.sh` | Starts server in background with logging |
|
||||
| `05_stop_server.sh` | Stops the background server |
|
||||
| `app.py` | Streamlit chat & file editor web app |
|
||||
| `requirements.txt` | Python dependencies for the Streamlit app |
|
||||
| `test_server.py` | Tests the running server via CLI |
|
||||
| `STUDENT_GUIDE.md` | Instructions for students |
|
||||
| File | Purpose |
|
||||
|------------------------------------|------------------------------------------------------|
|
||||
| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) |
|
||||
| `01_build_container.sh` | Builds the Apptainer `.sif` image |
|
||||
| `02_download_model.sh` | Downloads model weights (runs inside container) |
|
||||
| `03_start_server.sh` | Starts vLLM server (foreground) |
|
||||
| `04_start_server_background.sh` | Starts vLLM server in background with logging |
|
||||
| `05_stop_server.sh` | Stops the background vLLM server |
|
||||
| `06_setup_openwebui.sh` | Pulls the Open WebUI container image |
|
||||
| `07_start_openwebui.sh` | Starts Open WebUI (foreground) |
|
||||
| `08_start_openwebui_background.sh` | Starts Open WebUI in background with logging |
|
||||
| `09_stop_openwebui.sh` | Stops the background Open WebUI |
|
||||
| `app.py` | Streamlit chat & file editor web app |
|
||||
| `requirements.txt` | Python dependencies for the Streamlit app |
|
||||
| `test_server.py` | Tests the running server via CLI |
|
||||
| `STUDENT_GUIDE.md` | Instructions for students |
|
||||
|
||||
---
|
||||
|
||||
@ -285,6 +372,17 @@ tmux attach -t llm
|
||||
- Disable thinking mode for faster simple responses
|
||||
- Monitor: `curl http://localhost:7080/metrics`
|
||||
|
||||
### Open WebUI won't start
|
||||
- Ensure the vLLM server is running first on port 7080
|
||||
- Check that port 7081 is not already in use: `ss -tlnp | grep 7081`
|
||||
- Check logs: `tail -50 logs/openwebui_*.log`
|
||||
- If the database is corrupted, reset: `rm openwebui-data/webui.db` and restart
|
||||
|
||||
### Open WebUI shows no models
|
||||
- Verify vLLM is reachable: `curl http://localhost:7080/v1/models`
|
||||
- The OpenAI API base URL is set on first launch; if changed later, update
|
||||
it in the Open WebUI Admin Panel > Settings > Connections
|
||||
|
||||
### Syncing files to the server
|
||||
- No `git` or `pip` on the host — use `scp` from your local machine:
|
||||
```bash
|
||||
|
||||
@ -4,30 +4,93 @@
|
||||
|
||||
A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a
|
||||
Mixture-of-Experts model (35B total parameters, 3B active per token), providing
|
||||
fast and high-quality responses. You can interact with it using the
|
||||
**OpenAI-compatible API**.
|
||||
fast and high-quality responses.
|
||||
|
||||
## Connection Details
|
||||
There are **three ways** to interact with the model:
|
||||
|
||||
| Parameter | Value |
|
||||
|------------- |---------------------------------------------|
|
||||
| **Base URL** | `http://silicon.fhgr.ch:7080/v1` |
|
||||
| **Model** | `qwen3.5-35b-a3b` |
|
||||
| **API Key** | *(ask your instructor — may be `EMPTY`)* |
|
||||
1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
|
||||
2. **Streamlit App** — Local app with chat, file editor, and code execution
|
||||
3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
|
||||
|
||||
> **Note**: You must be on the university network or VPN to reach the server.
|
||||
|
||||
## Connection Details
|
||||
|
||||
| Parameter | Value |
|
||||
|------------------|---------------------------------------------|
|
||||
| **Open WebUI** | `http://silicon.fhgr.ch:7081` |
|
||||
| **API Base URL** | `http://silicon.fhgr.ch:7080/v1` |
|
||||
| **Model** | `qwen3.5-35b-a3b` |
|
||||
| **API Key** | *(ask your instructor — may be `EMPTY`)* |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start with Python
|
||||
## Option 1: Open WebUI (Recommended)
|
||||
|
||||
### 1. Install the OpenAI SDK
|
||||
The easiest way to chat with the model — no installation required.
|
||||
|
||||
### Getting Started
|
||||
|
||||
1. Make sure you are connected to the **university network** (or VPN).
|
||||
2. Open your browser and go to **http://silicon.fhgr.ch:7081**
|
||||
3. Click **"Sign Up"** to create a new account:
|
||||
- Enter your **name** (e.g. your first and last name)
|
||||
- Enter your **email** (use your university email)
|
||||
- Choose a **password**
|
||||
- Click **"Create Account"**
|
||||
4. After signing up you are logged in automatically.
|
||||
5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
|
||||
6. Type a message and press Enter — you're chatting with the LLM.
|
||||
|
||||
### Returning Later
|
||||
|
||||
- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
|
||||
- Enter the email and password you used during sign-up.
|
||||
- All your previous chats are still there.
|
||||
|
||||
### Features
|
||||
|
||||
- **Chat history** — all conversations are saved on the server and persist across sessions
|
||||
- **Markdown rendering** with syntax-highlighted code blocks
|
||||
- **Model selector** — auto-discovers available models from the server
|
||||
- **Conversation branching** — edit previous messages and explore alternative responses
|
||||
- **File upload** — attach files to your messages for the model to analyze
|
||||
- **Search** — search across all your past conversations
|
||||
|
||||
### Tips
|
||||
|
||||
- Your account and chat history are stored on the server. You can log in
|
||||
from any device on the university network.
|
||||
- If you forget your password, ask your instructor to reset it via the
|
||||
Admin Panel.
|
||||
- The model works best when you provide clear, specific instructions.
|
||||
- For code tasks, mention the programming language explicitly (e.g.
|
||||
"Write a Python function that...").
|
||||
- Long conversations use more context. Start a **New Chat** (top-left
|
||||
button) when switching topics to get faster, more focused responses.
|
||||
|
||||
---
|
||||
|
||||
## Option 2: Streamlit App (Chat + File Editor)
|
||||
|
||||
A local app with chat, file editing, and Python/LaTeX execution.
|
||||
See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
|
||||
|
||||
---
|
||||
|
||||
## Option 3: Python SDK / curl
|
||||
|
||||
For programmatic access and scripting.
|
||||
|
||||
### Quick Start with Python
|
||||
|
||||
#### 1. Install the OpenAI SDK
|
||||
|
||||
```bash
|
||||
pip install openai
|
||||
```
|
||||
|
||||
### 2. Simple Chat
|
||||
#### 2. Simple Chat
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
@ -50,7 +113,7 @@ response = client.chat.completions.create(
|
||||
print(response.choices[0].message.content)
|
||||
```
|
||||
|
||||
### 3. Streaming Responses
|
||||
#### 3. Streaming Responses
|
||||
|
||||
```python
|
||||
stream = client.chat.completions.create(
|
||||
@ -70,7 +133,7 @@ print()
|
||||
|
||||
---
|
||||
|
||||
## Quick Start with curl
|
||||
### Quick Start with curl
|
||||
|
||||
```bash
|
||||
curl http://silicon.fhgr.ch:7080/v1/chat/completions \
|
||||
@ -196,3 +259,5 @@ response = client.chat.completions.create(
|
||||
| Slow responses | The model is shared — peak times may be slower |
|
||||
| `401 Unauthorized` | Ask your instructor for the API key |
|
||||
| Response cut off | Increase `max_tokens` in your request |
|
||||
| Open WebUI login fails | Make sure you created an account first (Sign Up) |
|
||||
| Open WebUI shows no models | The vLLM server may still be loading — wait a few minutes |
|
||||
|
||||
202
app.py
202
app.py
@ -11,6 +11,7 @@ Usage:
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import streamlit as st
|
||||
from openai import OpenAI
|
||||
from pathlib import Path
|
||||
@ -51,6 +52,9 @@ LANG_MAP = {
|
||||
}
|
||||
|
||||
|
||||
MAX_CONTEXT = 32768
|
||||
|
||||
|
||||
def extract_code(text: str, lang: str = "") -> str:
|
||||
"""Extract the first fenced code block from markdown text.
|
||||
Falls back to the full text if no code block is found."""
|
||||
@ -61,6 +65,56 @@ def extract_code(text: str, lang: str = "") -> str:
|
||||
return text.strip()
|
||||
|
||||
|
||||
def estimate_tokens(messages: list[dict]) -> int:
|
||||
"""Rough token estimate: ~4 characters per token."""
|
||||
return sum(len(m["content"]) for m in messages) // 4
|
||||
|
||||
|
||||
def trim_history(messages: list[dict], reserved: int) -> list[dict]:
|
||||
"""Drop oldest message pairs to fit within context budget.
|
||||
Always keeps the latest user message."""
|
||||
budget = MAX_CONTEXT - reserved
|
||||
while len(messages) > 1 and estimate_tokens(messages) > budget:
|
||||
messages.pop(0)
|
||||
return messages
|
||||
|
||||
|
||||
RUNNABLE_EXTENSIONS = {".py", ".tex"}
|
||||
RUN_TIMEOUT = 30
|
||||
|
||||
|
||||
def run_file(file_path: Path) -> dict:
|
||||
"""Execute a .py or .tex file and return stdout, stderr, and return code."""
|
||||
suffix = file_path.suffix
|
||||
cwd = file_path.parent.resolve()
|
||||
|
||||
if suffix == ".py":
|
||||
cmd = ["python3", file_path.name]
|
||||
elif suffix == ".tex":
|
||||
cmd = [
|
||||
"pdflatex",
|
||||
"-interaction=nonstopmode",
|
||||
f"-output-directory={cwd}",
|
||||
file_path.name,
|
||||
]
|
||||
else:
|
||||
return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=RUN_TIMEOUT,
|
||||
)
|
||||
return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
|
||||
except FileNotFoundError as e:
|
||||
return {"stdout": "", "stderr": str(e), "rc": -1}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sidebar — File Manager
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -100,6 +154,10 @@ with tab_chat:
|
||||
with st.chat_message("user"):
|
||||
st.markdown(prompt)
|
||||
|
||||
st.session_state.messages = trim_history(
|
||||
st.session_state.messages, reserved=max_tokens
|
||||
)
|
||||
|
||||
with st.chat_message("assistant"):
|
||||
placeholder = st.empty()
|
||||
full_response = ""
|
||||
@ -123,6 +181,13 @@ with tab_chat:
|
||||
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
||||
|
||||
if st.session_state.messages:
|
||||
used = estimate_tokens(st.session_state.messages)
|
||||
pct = min(used / MAX_CONTEXT, 1.0)
|
||||
label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
|
||||
if pct > 0.8:
|
||||
label += " ⚠️ nearing limit — older messages will be trimmed"
|
||||
st.progress(pct, text=label)
|
||||
|
||||
col_clear, col_save = st.columns([1, 3])
|
||||
with col_clear:
|
||||
if st.button("Clear Chat"):
|
||||
@ -149,54 +214,103 @@ with tab_editor:
|
||||
content = file_path.read_text() if file_path.exists() else ""
|
||||
suffix = file_path.suffix
|
||||
lang = LANG_MAP.get(suffix, "text")
|
||||
runnable = suffix in RUNNABLE_EXTENSIONS
|
||||
|
||||
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
|
||||
if runnable:
|
||||
col_edit, col_term = st.columns([3, 2])
|
||||
else:
|
||||
col_edit = st.container()
|
||||
|
||||
edited = st.text_area(
|
||||
"Edit below:",
|
||||
value=content,
|
||||
height=400,
|
||||
key=f"editor_{selected_file}_{hash(content)}",
|
||||
)
|
||||
with col_edit:
|
||||
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
|
||||
|
||||
col_save, col_gen = st.columns(2)
|
||||
|
||||
with col_save:
|
||||
if st.button("Save File"):
|
||||
file_path.write_text(edited)
|
||||
st.success(f"Saved {selected_file}")
|
||||
st.rerun()
|
||||
|
||||
with col_gen:
|
||||
gen_prompt = st.text_input(
|
||||
"Generation instruction",
|
||||
placeholder="e.g. Add error handling / Fix the LaTeX formatting",
|
||||
key="gen_prompt",
|
||||
edited = st.text_area(
|
||||
"Edit below:",
|
||||
value=content,
|
||||
height=400,
|
||||
key=f"editor_{selected_file}_{hash(content)}",
|
||||
)
|
||||
if st.button("Generate with LLM") and gen_prompt:
|
||||
with st.spinner("Generating..."):
|
||||
response = client.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": (
|
||||
f"You are a coding assistant. The user has a {lang} file. "
|
||||
"Return ONLY the raw file content inside a single code block. "
|
||||
"No explanations, no comments about changes."
|
||||
)},
|
||||
{"role": "user", "content": (
|
||||
f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
|
||||
f"Instruction: {gen_prompt}"
|
||||
)},
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
|
||||
)
|
||||
result = response.choices[0].message.content
|
||||
code = extract_code(result, lang)
|
||||
file_path.write_text(code)
|
||||
st.success("File updated by LLM")
|
||||
|
||||
col_save, col_gen = st.columns(2)
|
||||
|
||||
with col_save:
|
||||
if st.button("Save File"):
|
||||
file_path.write_text(edited)
|
||||
st.success(f"Saved {selected_file}")
|
||||
st.rerun()
|
||||
|
||||
with col_gen:
|
||||
gen_prompt = st.text_input(
|
||||
"Generation instruction",
|
||||
placeholder="e.g. Add error handling / Fix the LaTeX formatting",
|
||||
key="gen_prompt",
|
||||
)
|
||||
if st.button("Generate with LLM") and gen_prompt:
|
||||
with st.spinner("Generating..."):
|
||||
response = client.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": (
|
||||
f"You are a coding assistant. The user has a {lang} file. "
|
||||
"Return ONLY the raw file content inside a single code block. "
|
||||
"No explanations, no comments about changes."
|
||||
)},
|
||||
{"role": "user", "content": (
|
||||
f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
|
||||
f"Instruction: {gen_prompt}"
|
||||
)},
|
||||
],
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
|
||||
)
|
||||
result = response.choices[0].message.content
|
||||
code = extract_code(result, lang)
|
||||
file_path.write_text(code)
|
||||
st.success("File updated by LLM")
|
||||
st.rerun()
|
||||
|
||||
if runnable:
|
||||
with col_term:
|
||||
run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
|
||||
st.subheader("Terminal Output")
|
||||
|
||||
if st.button(run_label, type="primary"):
|
||||
file_path.write_text(edited)
|
||||
with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
|
||||
result = run_file(file_path)
|
||||
st.session_state["last_run"] = result
|
||||
|
||||
result = st.session_state.get("last_run")
|
||||
if result:
|
||||
if result["rc"] == 0:
|
||||
st.success(f"Exit code: {result['rc']}")
|
||||
else:
|
||||
st.error(f"Exit code: {result['rc']}")
|
||||
|
||||
if result["stdout"]:
|
||||
st.text_area(
|
||||
"stdout",
|
||||
value=result["stdout"],
|
||||
height=300,
|
||||
disabled=True,
|
||||
key="run_stdout",
|
||||
)
|
||||
if result["stderr"]:
|
||||
st.text_area(
|
||||
"stderr",
|
||||
value=result["stderr"],
|
||||
height=200,
|
||||
disabled=True,
|
||||
key="run_stderr",
|
||||
)
|
||||
if not result["stdout"] and not result["stderr"]:
|
||||
st.info("No output produced.")
|
||||
else:
|
||||
st.caption(
|
||||
f"Click **{run_label}** to execute the file "
|
||||
f"(timeout: {RUN_TIMEOUT}s)."
|
||||
)
|
||||
else:
|
||||
st.info("Create a file in the sidebar to start editing.")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user