From f4fdaab73278f70a10bda7657983f1f514cbb27b Mon Sep 17 00:00:00 2001
From: herzogflorian <herzogflorian@users.noreply.github.com>
Date: Mon, 2 Mar 2026 18:48:51 +0100
Subject: [PATCH] Add Open WebUI integration and enhance Streamlit app

- Add Open WebUI scripts (06-09) for server-hosted ChatGPT-like interface
  connected to the vLLM backend on port 7081
- Add context window management to chat (auto-trim, token counter, progress bar)
- Add terminal output panel to file editor for running Python/LaTeX files
- Update README with Open WebUI setup, architecture diagram, and troubleshooting
- Update STUDENT_GUIDE with step-by-step Open WebUI login instructions

Made-with: Cursor
---
 .gitignore                       |   3 +
 06_setup_openwebui.sh            |  37 ++++++
 07_start_openwebui.sh            |  70 +++++++++++
 08_start_openwebui_background.sh |  52 ++++++++
 09_stop_openwebui.sh             |  31 +++++
 README.md                        | 162 ++++++++++++++++++++-----
 STUDENT_GUIDE.md                 |  91 ++++++++++++--
 app.py                           | 202 ++++++++++++++++++++++++-------
 8 files changed, 559 insertions(+), 89 deletions(-)
 create mode 100755 06_setup_openwebui.sh
 create mode 100755 07_start_openwebui.sh
 create mode 100755 08_start_openwebui_background.sh
 create mode 100755 09_stop_openwebui.sh

diff --git a/.gitignore b/.gitignore
index 1cab20f..cdb2d3d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,5 +16,8 @@ models/
 # Streamlit workspace files
 workspace/
 
+# Open WebUI persistent data (user accounts, chats, DB)
+openwebui-data/
+
 # macOS
 .DS_Store
diff --git a/06_setup_openwebui.sh b/06_setup_openwebui.sh
new file mode 100755
index 0000000..b1891d1
--- /dev/null
+++ b/06_setup_openwebui.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------
+# 06_setup_openwebui.sh
+# Pulls the Open WebUI container image and creates the data directory
+# for persistent storage (user accounts, chat history, settings).
+#
+# Usage:
+#   bash 06_setup_openwebui.sh
+# ------------------------------------------------------------------
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
+DATA_DIR="${SCRIPT_DIR}/openwebui-data"
+
+if [ -f "$SIF_FILE" ]; then
+    echo "Open WebUI container already exists at ${SIF_FILE}"
+    echo "Delete it first if you want to rebuild:"
+    echo "  rm ${SIF_FILE}"
+    exit 0
+fi
+
+echo "=== Pulling Open WebUI container image ==="
+echo "    Source: ghcr.io/open-webui/open-webui:main"
+echo "    This may take 5-10 minutes (~4 GB)..."
+echo ""
+
+apptainer pull "$SIF_FILE" docker://ghcr.io/open-webui/open-webui:main
+
+mkdir -p "$DATA_DIR"
+
+echo ""
+echo "=== Setup complete ==="
+echo "Image:     ${SIF_FILE} ($(du -sh "$SIF_FILE" | cut -f1))"
+echo "Data dir:  ${DATA_DIR}"
+echo ""
+echo "Next: bash 07_start_openwebui.sh"
diff --git a/07_start_openwebui.sh b/07_start_openwebui.sh
new file mode 100755
index 0000000..95798d0
--- /dev/null
+++ b/07_start_openwebui.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------
+# 07_start_openwebui.sh
+# Starts Open WebUI connected to the vLLM inference server.
+#
+# Open WebUI provides a ChatGPT-like interface with:
+#   - User accounts & chat history (persisted in openwebui-data/)
+#   - Model selector (auto-discovers models from vLLM)
+#   - Streaming responses, markdown rendering, code highlighting
+#
+# The first user to sign up becomes the admin.
+#
+# Usage:
+#   bash 07_start_openwebui.sh                    # defaults
+#   PORT=7082 bash 07_start_openwebui.sh          # custom port
+#   VLLM_BASE_URL=http://localhost:7080/v1 bash 07_start_openwebui.sh
+#
+# Environment variables:
+#   PORT            — HTTP port for Open WebUI    (default: 7081)
+#   VLLM_BASE_URL   — vLLM OpenAI-compatible URL  (default: http://localhost:7080/v1)
+#   VLLM_API_KEY    — API key for vLLM            (default: EMPTY)
+#   DATA_DIR        — Persistent storage path      (default: ./openwebui-data)
+# ------------------------------------------------------------------
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
+
+PORT="${PORT:-7081}"
+VLLM_BASE_URL="${VLLM_BASE_URL:-http://localhost:7080/v1}"
+VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
+DATA_DIR="${DATA_DIR:-${SCRIPT_DIR}/openwebui-data}"
+
+if [ ! -f "$SIF_FILE" ]; then
+    echo "ERROR: Container image not found at ${SIF_FILE}"
+    echo "       Run 06_setup_openwebui.sh first."
+    exit 1
+fi
+
+mkdir -p "$DATA_DIR"
+
+echo "=== Starting Open WebUI ==="
+echo "    Port:           ${PORT}"
+echo "    vLLM backend:   ${VLLM_BASE_URL}"
+echo "    Data directory: ${DATA_DIR}"
+echo ""
+echo "    Access at:      http://$(hostname -f 2>/dev/null || hostname):${PORT}"
+echo "    First user to sign up becomes admin."
+echo ""
+echo "    Press Ctrl+C to stop."
+echo "==========================================="
+echo ""
+
+apptainer exec \
+    --writable-tmpfs \
+    --pwd /app/backend \
+    --bind "${DATA_DIR}:/app/backend/data" \
+    --env PORT="${PORT}" \
+    --env ENABLE_OPENAI_API="True" \
+    --env OPENAI_API_BASE_URLS="${VLLM_BASE_URL}" \
+    --env OPENAI_API_KEYS="${VLLM_API_KEY}" \
+    --env ENABLE_OLLAMA_API="False" \
+    --env ENABLE_SIGNUP="True" \
+    --env DEFAULT_USER_ROLE="user" \
+    --env WEBUI_NAME="Qwen3.5 LLM Server" \
+    --env OFFLINE_MODE="True" \
+    --env ENABLE_VERSION_UPDATE_CHECK="False" \
+    --env HF_HUB_OFFLINE="1" \
+    "$SIF_FILE" \
+    bash start.sh
diff --git a/08_start_openwebui_background.sh b/08_start_openwebui_background.sh
new file mode 100755
index 0000000..8eba5ff
--- /dev/null
+++ b/08_start_openwebui_background.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------
+# 08_start_openwebui_background.sh
+# Launches Open WebUI in the background with logging.
+#
+# Usage:
+#   bash 08_start_openwebui_background.sh
+#
+# Logs are written to: ./logs/openwebui_<timestamp>.log
+# PID is written to:   ./logs/openwebui.pid
+# ------------------------------------------------------------------
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+LOG_DIR="${SCRIPT_DIR}/logs"
+mkdir -p "$LOG_DIR"
+
+TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
+LOG_FILE="${LOG_DIR}/openwebui_${TIMESTAMP}.log"
+PID_FILE="${LOG_DIR}/openwebui.pid"
+
+if [ -f "$PID_FILE" ]; then
+    OLD_PID=$(cat "$PID_FILE")
+    if kill -0 "$OLD_PID" 2>/dev/null; then
+        echo "Open WebUI already running with PID ${OLD_PID}"
+        echo "Stop it first:  bash 09_stop_openwebui.sh"
+        exit 1
+    fi
+fi
+
+echo "Starting Open WebUI in background..."
+echo "Log file: ${LOG_FILE}"
+
+nohup bash "${SCRIPT_DIR}/07_start_openwebui.sh" > "$LOG_FILE" 2>&1 &
+SERVER_PID=$!
+echo "$SERVER_PID" > "$PID_FILE"
+
+echo "Open WebUI PID: ${SERVER_PID}"
+echo ""
+echo "Monitor logs:    tail -f ${LOG_FILE}"
+echo "Stop:            bash 09_stop_openwebui.sh"
+echo ""
+
+sleep 5
+if kill -0 "$SERVER_PID" 2>/dev/null; then
+    echo "Open WebUI process is running. Starting up..."
+    echo "(Ready when you see 'Uvicorn running' in the logs)"
+else
+    echo "ERROR: Open WebUI process exited. Check logs:"
+    tail -20 "$LOG_FILE"
+    exit 1
+fi
diff --git a/09_stop_openwebui.sh b/09_stop_openwebui.sh
new file mode 100755
index 0000000..f31de2e
--- /dev/null
+++ b/09_stop_openwebui.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------
+# 09_stop_openwebui.sh
+# Gracefully stops the background Open WebUI server.
+# ------------------------------------------------------------------
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PID_FILE="${SCRIPT_DIR}/logs/openwebui.pid"
+
+if [ ! -f "$PID_FILE" ]; then
+    echo "No PID file found. Open WebUI may not be running."
+    exit 0
+fi
+
+SERVER_PID=$(cat "$PID_FILE")
+
+if kill -0 "$SERVER_PID" 2>/dev/null; then
+    echo "Stopping Open WebUI (PID: ${SERVER_PID})..."
+    kill "$SERVER_PID"
+    sleep 2
+    if kill -0 "$SERVER_PID" 2>/dev/null; then
+        echo "Process still alive, sending SIGKILL..."
+        kill -9 "$SERVER_PID"
+    fi
+    echo "Open WebUI stopped."
+else
+    echo "Open WebUI process (PID: ${SERVER_PID}) is not running."
+fi
+
+rm -f "$PID_FILE"
diff --git a/README.md b/README.md
index 7c6dd8d..04414dd 100644
--- a/README.md
+++ b/README.md
@@ -2,28 +2,35 @@
 
 Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B**
 (MoE, 35B total / 3B active per token), served via **vLLM** inside an
-**Apptainer** container on a GPU server. Includes a **Streamlit web app** for
-chat and file editing.
+**Apptainer** container on a GPU server. Two front-ends are provided:
+**Open WebUI** (server-hosted ChatGPT-like UI) and a **Streamlit app**
+(local chat + file editor with code execution).
 
 ## Architecture
 
 ```
-Students (Streamlit App / OpenAI SDK / curl)
-        │
-        ▼
-  ┌──────────────────────────────┐
-  │  silicon.fhgr.ch:7080       │
-  │  OpenAI-compatible API      │
-  ├──────────────────────────────┤
-  │  vLLM Server (nightly)      │
-  │  Apptainer container (.sif) │
-  ├──────────────────────────────┤
-  │  Qwen3.5-35B-A3B weights    │
-  │  (bind-mounted from host)   │
-  ├──────────────────────────────┤
-  │  2× NVIDIA L40S (46 GB ea.) │
-  │  Tensor Parallel = 2        │
-  └──────────────────────────────┘
+Students
+  │
+  ├── Browser ──► Open WebUI (silicon.fhgr.ch:7081)
+  │                  │  ChatGPT-like UI, user accounts, chat history
+  │                  │
+  ├── Streamlit ─────┤  Local app with file editor & code runner
+  │                  │
+  └── SDK / curl ────┘
+                     ▼
+          ┌──────────────────────────────┐
+          │  silicon.fhgr.ch:7080       │
+          │  OpenAI-compatible API      │
+          ├──────────────────────────────┤
+          │  vLLM Server (nightly)      │
+          │  Apptainer container (.sif) │
+          ├──────────────────────────────┤
+          │  Qwen3.5-35B-A3B weights    │
+          │  (bind-mounted from host)   │
+          ├──────────────────────────────┤
+          │  2× NVIDIA L40S (46 GB ea.) │
+          │  Tensor Parallel = 2        │
+          └──────────────────────────────┘
 ```
 
 ## Hardware
@@ -134,14 +141,90 @@ curl http://localhost:7080/v1/chat/completions \
   -d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}'
 ```
 
-### Step 7: Share with Students
+### Step 7: Set Up Open WebUI (ChatGPT-like Interface)
+
+Open WebUI provides a full-featured chat interface that runs on the server.
+Students access it via a browser — no local setup required.
+
+**Pull the container:**
+```bash
+bash 06_setup_openwebui.sh
+```
+
+**Start (foreground with tmux):**
+```bash
+tmux new -s webui
+bash 07_start_openwebui.sh
+# Ctrl+B, then D to detach
+```
+
+**Start (background with logging):**
+```bash
+bash 08_start_openwebui_background.sh
+tail -f logs/openwebui_*.log
+```
+
+Open WebUI is ready when you see `Uvicorn running` in the logs.
+Access it at `http://silicon.fhgr.ch:7081`.
+
+> **Important**: The first user to sign up becomes the **admin**. Sign up
+> yourself first before sharing the URL with students.
+
+### Step 8: Share with Students
 
 Distribute `STUDENT_GUIDE.md` with connection details:
-- **Base URL**: `http://silicon.fhgr.ch:7080/v1`
+- **Open WebUI**: `http://silicon.fhgr.ch:7081` (recommended for most students)
+- **API Base URL**: `http://silicon.fhgr.ch:7080/v1` (for SDK / programmatic use)
 - **Model name**: `qwen3.5-35b-a3b`
 
 ---
 
+## Open WebUI
+
+A server-hosted ChatGPT-like interface backed by the vLLM inference server.
+Runs as an Apptainer container on port **7081**.
+
+### Features
+
+- User accounts with persistent chat history (stored in `openwebui-data/`)
+- Auto-discovers models from the vLLM backend
+- Streaming responses, markdown rendering, code highlighting
+- Admin panel for managing users, models, and settings
+- No local setup needed — students just open a browser
+
+### Configuration
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PORT` | `7081` | HTTP port for the UI |
+| `VLLM_BASE_URL` | `http://localhost:7080/v1` | vLLM API endpoint |
+| `VLLM_API_KEY` | `EMPTY` | API key (if vLLM requires one) |
+| `DATA_DIR` | `./openwebui-data` | Persistent storage (DB, uploads) |
+
+### Management
+
+```bash
+# Start in background
+bash 08_start_openwebui_background.sh
+
+# View logs
+tail -f logs/openwebui_*.log
+
+# Stop
+bash 09_stop_openwebui.sh
+
+# Reconnect to tmux session
+tmux attach -t webui
+```
+
+### Data Persistence
+
+All user data (accounts, chats, settings) is stored in `openwebui-data/`.
+This directory is bind-mounted into the container, so data survives
+container restarts. Back it up regularly.
+
+---
+
 ## Streamlit App
 
 A web-based chat and file editor that connects to the inference server.
@@ -240,18 +323,22 @@ tmux attach -t llm
 
 ## Files Overview
 
-| File                             | Purpose                                              |
-|----------------------------------|------------------------------------------------------|
-| `vllm_qwen.def`                 | Apptainer container definition (vLLM nightly + deps) |
-| `01_build_container.sh`          | Builds the Apptainer `.sif` image                    |
-| `02_download_model.sh`           | Downloads model weights (runs inside container)      |
-| `03_start_server.sh`             | Starts vLLM server (foreground)                      |
-| `04_start_server_background.sh`  | Starts server in background with logging             |
-| `05_stop_server.sh`              | Stops the background server                          |
-| `app.py`                         | Streamlit chat & file editor web app                 |
-| `requirements.txt`               | Python dependencies for the Streamlit app            |
-| `test_server.py`                 | Tests the running server via CLI                     |
-| `STUDENT_GUIDE.md`               | Instructions for students                            |
+| File                               | Purpose                                              |
+|------------------------------------|------------------------------------------------------|
+| `vllm_qwen.def`                   | Apptainer container definition (vLLM nightly + deps) |
+| `01_build_container.sh`            | Builds the Apptainer `.sif` image                    |
+| `02_download_model.sh`             | Downloads model weights (runs inside container)      |
+| `03_start_server.sh`               | Starts vLLM server (foreground)                      |
+| `04_start_server_background.sh`    | Starts vLLM server in background with logging        |
+| `05_stop_server.sh`                | Stops the background vLLM server                     |
+| `06_setup_openwebui.sh`            | Pulls the Open WebUI container image                 |
+| `07_start_openwebui.sh`            | Starts Open WebUI (foreground)                       |
+| `08_start_openwebui_background.sh` | Starts Open WebUI in background with logging         |
+| `09_stop_openwebui.sh`             | Stops the background Open WebUI                      |
+| `app.py`                           | Streamlit chat & file editor web app                 |
+| `requirements.txt`                 | Python dependencies for the Streamlit app            |
+| `test_server.py`                   | Tests the running server via CLI                     |
+| `STUDENT_GUIDE.md`                 | Instructions for students                            |
 
 ---
 
@@ -285,6 +372,17 @@ tmux attach -t llm
 - Disable thinking mode for faster simple responses
 - Monitor: `curl http://localhost:7080/metrics`
 
+### Open WebUI won't start
+- Ensure the vLLM server is running first on port 7080
+- Check that port 7081 is not already in use: `ss -tlnp | grep 7081`
+- Check logs: `tail -50 logs/openwebui_*.log`
+- If the database is corrupted, reset: `rm openwebui-data/webui.db` and restart
+
+### Open WebUI shows no models
+- Verify vLLM is reachable: `curl http://localhost:7080/v1/models`
+- The OpenAI API base URL is set on first launch; if changed later, update
+  it in the Open WebUI Admin Panel > Settings > Connections
+
 ### Syncing files to the server
 - No `git` or `pip` on the host — use `scp` from your local machine:
 ```bash
diff --git a/STUDENT_GUIDE.md b/STUDENT_GUIDE.md
index 7a0601c..7009a09 100644
--- a/STUDENT_GUIDE.md
+++ b/STUDENT_GUIDE.md
@@ -4,30 +4,93 @@
 
 A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a
 Mixture-of-Experts model (35B total parameters, 3B active per token), providing
-fast and high-quality responses. You can interact with it using the
-**OpenAI-compatible API**.
+fast and high-quality responses.
 
-## Connection Details
+There are **three ways** to interact with the model:
 
-| Parameter    | Value                                       |
-|------------- |---------------------------------------------|
-| **Base URL** | `http://silicon.fhgr.ch:7080/v1`            |
-| **Model**    | `qwen3.5-35b-a3b`                           |
-| **API Key**  | *(ask your instructor — may be `EMPTY`)*    |
+1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
+2. **Streamlit App** — Local app with chat, file editor, and code execution
+3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
 
 > **Note**: You must be on the university network or VPN to reach the server.
 
+## Connection Details
+
+| Parameter        | Value                                       |
+|------------------|---------------------------------------------|
+| **Open WebUI**   | `http://silicon.fhgr.ch:7081`               |
+| **API Base URL** | `http://silicon.fhgr.ch:7080/v1`            |
+| **Model**        | `qwen3.5-35b-a3b`                           |
+| **API Key**      | *(ask your instructor — may be `EMPTY`)*    |
+
 ---
 
-## Quick Start with Python
+## Option 1: Open WebUI (Recommended)
 
-### 1. Install the OpenAI SDK
+The easiest way to chat with the model — no installation required.
+
+### Getting Started
+
+1. Make sure you are connected to the **university network** (or VPN).
+2. Open your browser and go to **http://silicon.fhgr.ch:7081**
+3. Click **"Sign Up"** to create a new account:
+   - Enter your **name** (e.g. your first and last name)
+   - Enter your **email** (use your university email)
+   - Choose a **password**
+   - Click **"Create Account"**
+4. After signing up you are logged in automatically.
+5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
+6. Type a message and press Enter — you're chatting with the LLM.
+
+### Returning Later
+
+- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
+- Enter the email and password you used during sign-up.
+- All your previous chats are still there.
+
+### Features
+
+- **Chat history** — all conversations are saved on the server and persist across sessions
+- **Markdown rendering** with syntax-highlighted code blocks
+- **Model selector** — auto-discovers available models from the server
+- **Conversation branching** — edit previous messages and explore alternative responses
+- **File upload** — attach files to your messages for the model to analyze
+- **Search** — search across all your past conversations
+
+### Tips
+
+- Your account and chat history are stored on the server. You can log in
+  from any device on the university network.
+- If you forget your password, ask your instructor to reset it via the
+  Admin Panel.
+- The model works best when you provide clear, specific instructions.
+- For code tasks, mention the programming language explicitly (e.g.
+  "Write a Python function that...").
+- Long conversations use more context. Start a **New Chat** (top-left
+  button) when switching topics to get faster, more focused responses.
+
+---
+
+## Option 2: Streamlit App (Chat + File Editor)
+
+A local app with chat, file editing, and Python/LaTeX execution.
+See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
+
+---
+
+## Option 3: Python SDK / curl
+
+For programmatic access and scripting.
+
+### Quick Start with Python
+
+#### 1. Install the OpenAI SDK
 
 ```bash
 pip install openai
 ```
 
-### 2. Simple Chat
+#### 2. Simple Chat
 
 ```python
 from openai import OpenAI
@@ -50,7 +113,7 @@ response = client.chat.completions.create(
 print(response.choices[0].message.content)
 ```
 
-### 3. Streaming Responses
+#### 3. Streaming Responses
 
 ```python
 stream = client.chat.completions.create(
@@ -70,7 +133,7 @@ print()
 
 ---
 
-## Quick Start with curl
+### Quick Start with curl
 
 ```bash
 curl http://silicon.fhgr.ch:7080/v1/chat/completions \
@@ -196,3 +259,5 @@ response = client.chat.completions.create(
 | Slow responses              | The model is shared — peak times may be slower      |
 | `401 Unauthorized`          | Ask your instructor for the API key                 |
 | Response cut off            | Increase `max_tokens` in your request               |
+| Open WebUI login fails      | Make sure you created an account first (Sign Up)    |
+| Open WebUI shows no models  | The vLLM server may still be loading — wait a few minutes |
diff --git a/app.py b/app.py
index faf9cdd..0654aa0 100644
--- a/app.py
+++ b/app.py
@@ -11,6 +11,7 @@ Usage:
 """
 
 import re
+import subprocess
 import streamlit as st
 from openai import OpenAI
 from pathlib import Path
@@ -51,6 +52,9 @@ LANG_MAP = {
 }
 
 
+MAX_CONTEXT = 32768
+
+
 def extract_code(text: str, lang: str = "") -> str:
     """Extract the first fenced code block from markdown text.
     Falls back to the full text if no code block is found."""
@@ -61,6 +65,56 @@ def extract_code(text: str, lang: str = "") -> str:
     return text.strip()
 
 
+def estimate_tokens(messages: list[dict]) -> int:
+    """Rough token estimate: ~4 characters per token."""
+    return sum(len(m["content"]) for m in messages) // 4
+
+
+def trim_history(messages: list[dict], reserved: int) -> list[dict]:
+    """Drop oldest message pairs to fit within context budget.
+    Always keeps the latest user message."""
+    budget = MAX_CONTEXT - reserved
+    while len(messages) > 1 and estimate_tokens(messages) > budget:
+        messages.pop(0)
+    return messages
+
+
+RUNNABLE_EXTENSIONS = {".py", ".tex"}
+RUN_TIMEOUT = 30
+
+
+def run_file(file_path: Path) -> dict:
+    """Execute a .py or .tex file and return stdout, stderr, and return code."""
+    suffix = file_path.suffix
+    cwd = file_path.parent.resolve()
+
+    if suffix == ".py":
+        cmd = ["python3", file_path.name]
+    elif suffix == ".tex":
+        cmd = [
+            "pdflatex",
+            "-interaction=nonstopmode",
+            f"-output-directory={cwd}",
+            file_path.name,
+        ]
+    else:
+        return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
+
+    try:
+        proc = subprocess.run(
+            cmd,
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+            timeout=RUN_TIMEOUT,
+        )
+        return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
+    except subprocess.TimeoutExpired:
+        return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
+    except FileNotFoundError as e:
+        return {"stdout": "", "stderr": str(e), "rc": -1}
+
+
 # ---------------------------------------------------------------------------
 # Sidebar — File Manager
 # ---------------------------------------------------------------------------
@@ -100,6 +154,10 @@ with tab_chat:
         with st.chat_message("user"):
             st.markdown(prompt)
 
+        st.session_state.messages = trim_history(
+            st.session_state.messages, reserved=max_tokens
+        )
+
         with st.chat_message("assistant"):
             placeholder = st.empty()
             full_response = ""
@@ -123,6 +181,13 @@ with tab_chat:
         st.session_state.messages.append({"role": "assistant", "content": full_response})
 
     if st.session_state.messages:
+        used = estimate_tokens(st.session_state.messages)
+        pct = min(used / MAX_CONTEXT, 1.0)
+        label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
+        if pct > 0.8:
+            label += " ⚠️ nearing limit — older messages will be trimmed"
+        st.progress(pct, text=label)
+
         col_clear, col_save = st.columns([1, 3])
         with col_clear:
             if st.button("Clear Chat"):
@@ -149,54 +214,103 @@ with tab_editor:
         content = file_path.read_text() if file_path.exists() else ""
         suffix = file_path.suffix
         lang = LANG_MAP.get(suffix, "text")
+        runnable = suffix in RUNNABLE_EXTENSIONS
 
-        st.code(content, language=lang if lang != "text" else None, line_numbers=True)
+        if runnable:
+            col_edit, col_term = st.columns([3, 2])
+        else:
+            col_edit = st.container()
 
-        edited = st.text_area(
-            "Edit below:",
-            value=content,
-            height=400,
-            key=f"editor_{selected_file}_{hash(content)}",
-        )
+        with col_edit:
+            st.code(content, language=lang if lang != "text" else None, line_numbers=True)
 
-        col_save, col_gen = st.columns(2)
-
-        with col_save:
-            if st.button("Save File"):
-                file_path.write_text(edited)
-                st.success(f"Saved {selected_file}")
-                st.rerun()
-
-        with col_gen:
-            gen_prompt = st.text_input(
-                "Generation instruction",
-                placeholder="e.g. Add error handling / Fix the LaTeX formatting",
-                key="gen_prompt",
+            edited = st.text_area(
+                "Edit below:",
+                value=content,
+                height=400,
+                key=f"editor_{selected_file}_{hash(content)}",
             )
-            if st.button("Generate with LLM") and gen_prompt:
-                with st.spinner("Generating..."):
-                    response = client.chat.completions.create(
-                        model=MODEL,
-                        messages=[
-                            {"role": "system", "content": (
-                                f"You are a coding assistant. The user has a {lang} file. "
-                                "Return ONLY the raw file content inside a single code block. "
-                                "No explanations, no comments about changes."
-                            )},
-                            {"role": "user", "content": (
-                                f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
-                                f"Instruction: {gen_prompt}"
-                            )},
-                        ],
-                        max_tokens=max_tokens,
-                        temperature=temperature,
-                        top_p=top_p,
-                        extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
-                    )
-                    result = response.choices[0].message.content
-                    code = extract_code(result, lang)
-                    file_path.write_text(code)
-                    st.success("File updated by LLM")
+
+            col_save, col_gen = st.columns(2)
+
+            with col_save:
+                if st.button("Save File"):
+                    file_path.write_text(edited)
+                    st.success(f"Saved {selected_file}")
                     st.rerun()
+
+            with col_gen:
+                gen_prompt = st.text_input(
+                    "Generation instruction",
+                    placeholder="e.g. Add error handling / Fix the LaTeX formatting",
+                    key="gen_prompt",
+                )
+                if st.button("Generate with LLM") and gen_prompt:
+                    with st.spinner("Generating..."):
+                        response = client.chat.completions.create(
+                            model=MODEL,
+                            messages=[
+                                {"role": "system", "content": (
+                                    f"You are a coding assistant. The user has a {lang} file. "
+                                    "Return ONLY the raw file content inside a single code block. "
+                                    "No explanations, no comments about changes."
+                                )},
+                                {"role": "user", "content": (
+                                    f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
+                                    f"Instruction: {gen_prompt}"
+                                )},
+                            ],
+                            max_tokens=max_tokens,
+                            temperature=temperature,
+                            top_p=top_p,
+                            extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
+                        )
+                        result = response.choices[0].message.content
+                        code = extract_code(result, lang)
+                        file_path.write_text(code)
+                        st.success("File updated by LLM")
+                        st.rerun()
+
+        if runnable:
+            with col_term:
+                run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
+                st.subheader("Terminal Output")
+
+                if st.button(run_label, type="primary"):
+                    file_path.write_text(edited)
+                    with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
+                        result = run_file(file_path)
+                    st.session_state["last_run"] = result
+
+                result = st.session_state.get("last_run")
+                if result:
+                    if result["rc"] == 0:
+                        st.success(f"Exit code: {result['rc']}")
+                    else:
+                        st.error(f"Exit code: {result['rc']}")
+
+                    if result["stdout"]:
+                        st.text_area(
+                            "stdout",
+                            value=result["stdout"],
+                            height=300,
+                            disabled=True,
+                            key="run_stdout",
+                        )
+                    if result["stderr"]:
+                        st.text_area(
+                            "stderr",
+                            value=result["stderr"],
+                            height=200,
+                            disabled=True,
+                            key="run_stderr",
+                        )
+                    if not result["stdout"] and not result["stderr"]:
+                        st.info("No output produced.")
+                else:
+                    st.caption(
+                        f"Click **{run_label}** to execute the file "
+                        f"(timeout: {RUN_TIMEOUT}s)."
+                    )
     else:
         st.info("Create a file in the sidebar to start editing.")