Add Open WebUI integration and enhance Streamlit app

- Add Open WebUI scripts (06-09) for server-hosted ChatGPT-like interface connected to the vLLM backend on port 7081 - Add context window management to chat (auto-trim, token counter, progress bar) - Add terminal output panel to file editor for running Python/LaTeX files - Update README with Open WebUI setup, architecture diagram, and troubleshooting - Update STUDENT_GUIDE with step-by-step Open WebUI login instructions Made-with: Cursor
2026-03-02 18:48:51 +01:00 · 2026-03-02 18:48:51 +01:00 · f4fdaab732
commit f4fdaab732
parent d59285fe69
8 changed files with 559 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -16,5 +16,8 @@ models/
 # Streamlit workspace files
 workspace/
 # Open WebUI persistent data (user accounts, chats, DB)
 openwebui-data/
 # macOS
 .DS_Store
--- a/06_setup_openwebui.sh
+++ b/06_setup_openwebui.sh
@ -0,0 +1,37 @@
 #!/usr/bin/env bash
 # ------------------------------------------------------------------
 # 06_setup_openwebui.sh
 # Pulls the Open WebUI container image and creates the data directory
 # for persistent storage (user accounts, chat history, settings).
 #
 # Usage:
 #   bash 06_setup_openwebui.sh
 # ------------------------------------------------------------------
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
 DATA_DIR="${SCRIPT_DIR}/openwebui-data"
 if [ -f "$SIF_FILE" ]; then
    echo "Open WebUI container already exists at ${SIF_FILE}"
    echo "Delete it first if you want to rebuild:"
    echo "  rm ${SIF_FILE}"
    exit 0
 fi
 echo "=== Pulling Open WebUI container image ==="
 echo "    Source: ghcr.io/open-webui/open-webui:main"
 echo "    This may take 5-10 minutes (~4 GB)..."
 echo ""
 apptainer pull "$SIF_FILE" docker://ghcr.io/open-webui/open-webui:main
 mkdir -p "$DATA_DIR"
 echo ""
 echo "=== Setup complete ==="
 echo "Image:     ${SIF_FILE} ($(du -sh "$SIF_FILE" | cut -f1))"
 echo "Data dir:  ${DATA_DIR}"
 echo ""
 echo "Next: bash 07_start_openwebui.sh"
--- a/07_start_openwebui.sh
+++ b/07_start_openwebui.sh
@ -0,0 +1,70 @@
 #!/usr/bin/env bash
 # ------------------------------------------------------------------
 # 07_start_openwebui.sh
 # Starts Open WebUI connected to the vLLM inference server.
 #
 # Open WebUI provides a ChatGPT-like interface with:
 #   - User accounts & chat history (persisted in openwebui-data/)
 #   - Model selector (auto-discovers models from vLLM)
 #   - Streaming responses, markdown rendering, code highlighting
 #
 # The first user to sign up becomes the admin.
 #
 # Usage:
 #   bash 07_start_openwebui.sh                    # defaults
 #   PORT=7082 bash 07_start_openwebui.sh          # custom port
 #   VLLM_BASE_URL=http://localhost:7080/v1 bash 07_start_openwebui.sh
 #
 # Environment variables:
 #   PORT            — HTTP port for Open WebUI    (default: 7081)
 #   VLLM_BASE_URL   — vLLM OpenAI-compatible URL  (default: http://localhost:7080/v1)
 #   VLLM_API_KEY    — API key for vLLM            (default: EMPTY)
 #   DATA_DIR        — Persistent storage path      (default: ./openwebui-data)
 # ------------------------------------------------------------------
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 SIF_FILE="${SCRIPT_DIR}/open-webui.sif"
 PORT="${PORT:-7081}"
 VLLM_BASE_URL="${VLLM_BASE_URL:-http://localhost:7080/v1}"
 VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
 DATA_DIR="${DATA_DIR:-${SCRIPT_DIR}/openwebui-data}"
 if [ ! -f "$SIF_FILE" ]; then
    echo "ERROR: Container image not found at ${SIF_FILE}"
    echo "       Run 06_setup_openwebui.sh first."
    exit 1
 fi
 mkdir -p "$DATA_DIR"
 echo "=== Starting Open WebUI ==="
 echo "    Port:           ${PORT}"
 echo "    vLLM backend:   ${VLLM_BASE_URL}"
 echo "    Data directory: ${DATA_DIR}"
 echo ""
 echo "    Access at:      http://$(hostname -f 2>/dev/null || hostname):${PORT}"
 echo "    First user to sign up becomes admin."
 echo ""
 echo "    Press Ctrl+C to stop."
 echo "==========================================="
 echo ""
 apptainer exec \
    --writable-tmpfs \
    --pwd /app/backend \
    --bind "${DATA_DIR}:/app/backend/data" \
    --env PORT="${PORT}" \
    --env ENABLE_OPENAI_API="True" \
    --env OPENAI_API_BASE_URLS="${VLLM_BASE_URL}" \
    --env OPENAI_API_KEYS="${VLLM_API_KEY}" \
    --env ENABLE_OLLAMA_API="False" \
    --env ENABLE_SIGNUP="True" \
    --env DEFAULT_USER_ROLE="user" \
    --env WEBUI_NAME="Qwen3.5 LLM Server" \
    --env OFFLINE_MODE="True" \
    --env ENABLE_VERSION_UPDATE_CHECK="False" \
    --env HF_HUB_OFFLINE="1" \
    "$SIF_FILE" \
    bash start.sh
--- a/08_start_openwebui_background.sh
+++ b/08_start_openwebui_background.sh
@ -0,0 +1,52 @@
 #!/usr/bin/env bash
 # ------------------------------------------------------------------
 # 08_start_openwebui_background.sh
 # Launches Open WebUI in the background with logging.
 #
 # Usage:
 #   bash 08_start_openwebui_background.sh
 #
 # Logs are written to: ./logs/openwebui_<timestamp>.log
 # PID is written to:   ./logs/openwebui.pid
 # ------------------------------------------------------------------
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 LOG_DIR="${SCRIPT_DIR}/logs"
 mkdir -p "$LOG_DIR"
 TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
 LOG_FILE="${LOG_DIR}/openwebui_${TIMESTAMP}.log"
 PID_FILE="${LOG_DIR}/openwebui.pid"
 if [ -f "$PID_FILE" ]; then
    OLD_PID=$(cat "$PID_FILE")
    if kill -0 "$OLD_PID" 2>/dev/null; then
        echo "Open WebUI already running with PID ${OLD_PID}"
        echo "Stop it first:  bash 09_stop_openwebui.sh"
        exit 1
    fi
 fi
 echo "Starting Open WebUI in background..."
 echo "Log file: ${LOG_FILE}"
 nohup bash "${SCRIPT_DIR}/07_start_openwebui.sh" > "$LOG_FILE" 2>&1 &
 SERVER_PID=$!
 echo "$SERVER_PID" > "$PID_FILE"
 echo "Open WebUI PID: ${SERVER_PID}"
 echo ""
 echo "Monitor logs:    tail -f ${LOG_FILE}"
 echo "Stop:            bash 09_stop_openwebui.sh"
 echo ""
 sleep 5
 if kill -0 "$SERVER_PID" 2>/dev/null; then
    echo "Open WebUI process is running. Starting up..."
    echo "(Ready when you see 'Uvicorn running' in the logs)"
 else
    echo "ERROR: Open WebUI process exited. Check logs:"
    tail -20 "$LOG_FILE"
    exit 1
 fi
--- a/09_stop_openwebui.sh
+++ b/09_stop_openwebui.sh
@ -0,0 +1,31 @@
 #!/usr/bin/env bash
 # ------------------------------------------------------------------
 # 09_stop_openwebui.sh
 # Gracefully stops the background Open WebUI server.
 # ------------------------------------------------------------------
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 PID_FILE="${SCRIPT_DIR}/logs/openwebui.pid"
 if [ ! -f "$PID_FILE" ]; then
    echo "No PID file found. Open WebUI may not be running."
    exit 0
 fi
 SERVER_PID=$(cat "$PID_FILE")
 if kill -0 "$SERVER_PID" 2>/dev/null; then
    echo "Stopping Open WebUI (PID: ${SERVER_PID})..."
    kill "$SERVER_PID"
    sleep 2
    if kill -0 "$SERVER_PID" 2>/dev/null; then
        echo "Process still alive, sending SIGKILL..."
        kill -9 "$SERVER_PID"
    fi
    echo "Open WebUI stopped."
 else
    echo "Open WebUI process (PID: ${SERVER_PID}) is not running."
 fi
 rm -f "$PID_FILE"
--- a/README.md
+++ b/README.md
@ -2,28 +2,35 @@
 Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B**
 (MoE, 35B total / 3B active per token), served via **vLLM** inside an
-**Apptainer** container on a GPU server. Includes a **Streamlit web app** for
+**Apptainer** container on a GPU server. Two front-ends are provided:
-chat and file editing.
+**Open WebUI** (server-hosted ChatGPT-like UI) and a **Streamlit app**
 (local chat + file editor with code execution).
 ## Architecture
 ```
-Students (Streamlit App / OpenAI SDK / curl)
+Students
-        │
+  │
-        ▼
+  ├── Browser ──► Open WebUI (silicon.fhgr.ch:7081)
-  ┌──────────────────────────────┐
+  │                  │  ChatGPT-like UI, user accounts, chat history
-  │  silicon.fhgr.ch:7080       │
+  │                  │
-  │  OpenAI-compatible API      │
+  ├── Streamlit ─────┤  Local app with file editor & code runner
-  ├──────────────────────────────┤
+  │                  │
-  │  vLLM Server (nightly)      │
+  └── SDK / curl ────┘
-  │  Apptainer container (.sif) │
+                     ▼
-  ├──────────────────────────────┤
+          ┌──────────────────────────────┐
-  │  Qwen3.5-35B-A3B weights    │
+          │  silicon.fhgr.ch:7080       │
-  │  (bind-mounted from host)   │
+          │  OpenAI-compatible API      │
-  ├──────────────────────────────┤
+          ├──────────────────────────────┤
-  │  2× NVIDIA L40S (46 GB ea.) │
+          │  vLLM Server (nightly)      │
-  │  Tensor Parallel = 2        │
+          │  Apptainer container (.sif) │
-  └──────────────────────────────┘
+          ├──────────────────────────────┤
          │  Qwen3.5-35B-A3B weights    │
          │  (bind-mounted from host)   │
          ├──────────────────────────────┤
          │  2× NVIDIA L40S (46 GB ea.) │
          │  Tensor Parallel = 2        │
          └──────────────────────────────┘
 ```
 ## Hardware
@ -134,14 +141,90 @@ curl http://localhost:7080/v1/chat/completions \
  -d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}'
 ```
-### Step 7: Share with Students
+### Step 7: Set Up Open WebUI (ChatGPT-like Interface)
 Open WebUI provides a full-featured chat interface that runs on the server.
 Students access it via a browser — no local setup required.
 **Pull the container:**
 ```bash
 bash 06_setup_openwebui.sh
 ```
 **Start (foreground with tmux):**
 ```bash
 tmux new -s webui
 bash 07_start_openwebui.sh
 # Ctrl+B, then D to detach
 ```
 **Start (background with logging):**
 ```bash
 bash 08_start_openwebui_background.sh
 tail -f logs/openwebui_*.log
 ```
 Open WebUI is ready when you see `Uvicorn running` in the logs.
 Access it at `http://silicon.fhgr.ch:7081`.
 > **Important**: The first user to sign up becomes the **admin**. Sign up
 > yourself first before sharing the URL with students.
 ### Step 8: Share with Students
 Distribute `STUDENT_GUIDE.md` with connection details:
- **Base URL**: `http://silicon.fhgr.ch:7080/v1`
+- **Open WebUI**: `http://silicon.fhgr.ch:7081` (recommended for most students)
 - **API Base URL**: `http://silicon.fhgr.ch:7080/v1` (for SDK / programmatic use)
 - **Model name**: `qwen3.5-35b-a3b`
 ---
 ## Open WebUI
 A server-hosted ChatGPT-like interface backed by the vLLM inference server.
 Runs as an Apptainer container on port **7081**.
 ### Features
 - User accounts with persistent chat history (stored in `openwebui-data/`)
 - Auto-discovers models from the vLLM backend
 - Streaming responses, markdown rendering, code highlighting
 - Admin panel for managing users, models, and settings
 - No local setup needed — students just open a browser
 ### Configuration
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `PORT` | `7081` | HTTP port for the UI |
 | `VLLM_BASE_URL` | `http://localhost:7080/v1` | vLLM API endpoint |
 | `VLLM_API_KEY` | `EMPTY` | API key (if vLLM requires one) |
 | `DATA_DIR` | `./openwebui-data` | Persistent storage (DB, uploads) |
 ### Management
 ```bash
 # Start in background
 bash 08_start_openwebui_background.sh
 # View logs
 tail -f logs/openwebui_*.log
 # Stop
 bash 09_stop_openwebui.sh
 # Reconnect to tmux session
 tmux attach -t webui
 ```
 ### Data Persistence
 All user data (accounts, chats, settings) is stored in `openwebui-data/`.
 This directory is bind-mounted into the container, so data survives
 container restarts. Back it up regularly.
 ---
 ## Streamlit App
 A web-based chat and file editor that connects to the inference server.
@ -240,18 +323,22 @@ tmux attach -t llm
 ## Files Overview
-| File                             | Purpose                                              |
+| File                               | Purpose                                              |
-|----------------------------------|------------------------------------------------------|
+|------------------------------------|------------------------------------------------------|
-| `vllm_qwen.def`                 | Apptainer container definition (vLLM nightly + deps) |
+| `vllm_qwen.def`                   | Apptainer container definition (vLLM nightly + deps) |
-| `01_build_container.sh`          | Builds the Apptainer `.sif` image                    |
+| `01_build_container.sh`            | Builds the Apptainer `.sif` image                    |
-| `02_download_model.sh`           | Downloads model weights (runs inside container)      |
+| `02_download_model.sh`             | Downloads model weights (runs inside container)      |
-| `03_start_server.sh`             | Starts vLLM server (foreground)                      |
+| `03_start_server.sh`               | Starts vLLM server (foreground)                      |
-| `04_start_server_background.sh`  | Starts server in background with logging             |
+| `04_start_server_background.sh`    | Starts vLLM server in background with logging        |
-| `05_stop_server.sh`              | Stops the background server                          |
+| `05_stop_server.sh`                | Stops the background vLLM server                     |
-| `app.py`                         | Streamlit chat & file editor web app                 |
+| `06_setup_openwebui.sh`            | Pulls the Open WebUI container image                 |
-| `requirements.txt`               | Python dependencies for the Streamlit app            |
+| `07_start_openwebui.sh`            | Starts Open WebUI (foreground)                       |
-| `test_server.py`                 | Tests the running server via CLI                     |
+| `08_start_openwebui_background.sh` | Starts Open WebUI in background with logging         |
-| `STUDENT_GUIDE.md`               | Instructions for students                            |
+| `09_stop_openwebui.sh`             | Stops the background Open WebUI                      |
 | `app.py`                           | Streamlit chat & file editor web app                 |
 | `requirements.txt`                 | Python dependencies for the Streamlit app            |
 | `test_server.py`                   | Tests the running server via CLI                     |
 | `STUDENT_GUIDE.md`                 | Instructions for students                            |
 ---
@ -285,6 +372,17 @@ tmux attach -t llm
 - Disable thinking mode for faster simple responses
 - Monitor: `curl http://localhost:7080/metrics`
 ### Open WebUI won't start
 - Ensure the vLLM server is running first on port 7080
 - Check that port 7081 is not already in use: `ss -tlnp | grep 7081`
 - Check logs: `tail -50 logs/openwebui_*.log`
 - If the database is corrupted, reset: `rm openwebui-data/webui.db` and restart
 ### Open WebUI shows no models
 - Verify vLLM is reachable: `curl http://localhost:7080/v1/models`
 - The OpenAI API base URL is set on first launch; if changed later, update
  it in the Open WebUI Admin Panel > Settings > Connections
 ### Syncing files to the server
 - No `git` or `pip` on the host — use `scp` from your local machine:
 ```bash
--- a/STUDENT_GUIDE.md
+++ b/STUDENT_GUIDE.md
@ -4,30 +4,93 @@
 A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a
 Mixture-of-Experts model (35B total parameters, 3B active per token), providing
-fast and high-quality responses. You can interact with it using the
+fast and high-quality responses.
 **OpenAI-compatible API**.
-## Connection Details
+There are **three ways** to interact with the model:
-| Parameter    | Value                                       |
+1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
-|------------- |---------------------------------------------|
+2. **Streamlit App** — Local app with chat, file editor, and code execution
-| **Base URL** | `http://silicon.fhgr.ch:7080/v1`            |
+3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
 | **Model**    | `qwen3.5-35b-a3b`                           |
 | **API Key**  | *(ask your instructor — may be `EMPTY`)*    |
 > **Note**: You must be on the university network or VPN to reach the server.
 ## Connection Details
 | Parameter        | Value                                       |
 |------------------|---------------------------------------------|
 | **Open WebUI**   | `http://silicon.fhgr.ch:7081`               |
 | **API Base URL** | `http://silicon.fhgr.ch:7080/v1`            |
 | **Model**        | `qwen3.5-35b-a3b`                           |
 | **API Key**      | *(ask your instructor — may be `EMPTY`)*    |
 ---
-## Quick Start with Python
+## Option 1: Open WebUI (Recommended)
-### 1. Install the OpenAI SDK
+The easiest way to chat with the model — no installation required.
 ### Getting Started
 1. Make sure you are connected to the **university network** (or VPN).
 2. Open your browser and go to **http://silicon.fhgr.ch:7081**
 3. Click **"Sign Up"** to create a new account:
   - Enter your **name** (e.g. your first and last name)
   - Enter your **email** (use your university email)
   - Choose a **password**
   - Click **"Create Account"**
 4. After signing up you are logged in automatically.
 5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
 6. Type a message and press Enter — you're chatting with the LLM.
 ### Returning Later
 - Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
 - Enter the email and password you used during sign-up.
 - All your previous chats are still there.
 ### Features
 - **Chat history** — all conversations are saved on the server and persist across sessions
 - **Markdown rendering** with syntax-highlighted code blocks
 - **Model selector** — auto-discovers available models from the server
 - **Conversation branching** — edit previous messages and explore alternative responses
 - **File upload** — attach files to your messages for the model to analyze
 - **Search** — search across all your past conversations
 ### Tips
 - Your account and chat history are stored on the server. You can log in
  from any device on the university network.
 - If you forget your password, ask your instructor to reset it via the
  Admin Panel.
 - The model works best when you provide clear, specific instructions.
 - For code tasks, mention the programming language explicitly (e.g.
  "Write a Python function that...").
 - Long conversations use more context. Start a **New Chat** (top-left
  button) when switching topics to get faster, more focused responses.
 ---
 ## Option 2: Streamlit App (Chat + File Editor)
 A local app with chat, file editing, and Python/LaTeX execution.
 See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
 ---
 ## Option 3: Python SDK / curl
 For programmatic access and scripting.
 ### Quick Start with Python
 #### 1. Install the OpenAI SDK
 ```bash
 pip install openai
 ```
-### 2. Simple Chat
+#### 2. Simple Chat
 ```python
 from openai import OpenAI
@ -50,7 +113,7 @@ response = client.chat.completions.create(
 print(response.choices[0].message.content)
 ```
-### 3. Streaming Responses
+#### 3. Streaming Responses
 ```python
 stream = client.chat.completions.create(
@ -70,7 +133,7 @@ print()
 ---
-## Quick Start with curl
+### Quick Start with curl
 ```bash
 curl http://silicon.fhgr.ch:7080/v1/chat/completions \
@ -196,3 +259,5 @@ response = client.chat.completions.create(
 | Slow responses              | The model is shared — peak times may be slower      |
 | `401 Unauthorized`          | Ask your instructor for the API key                 |
 | Response cut off            | Increase `max_tokens` in your request               |
 | Open WebUI login fails      | Make sure you created an account first (Sign Up)    |
 | Open WebUI shows no models  | The vLLM server may still be loading — wait a few minutes |
--- a/app.py
+++ b/app.py
@ -11,6 +11,7 @@ Usage:
 """
 import re
 import subprocess
 import streamlit as st
 from openai import OpenAI
 from pathlib import Path
@ -51,6 +52,9 @@ LANG_MAP = {
 }
 MAX_CONTEXT = 32768
 def extract_code(text: str, lang: str = "") -> str:
    """Extract the first fenced code block from markdown text.
    Falls back to the full text if no code block is found."""
@ -61,6 +65,56 @@ def extract_code(text: str, lang: str = "") -> str:
    return text.strip()
 def estimate_tokens(messages: list[dict]) -> int:
    """Rough token estimate: ~4 characters per token."""
    return sum(len(m["content"]) for m in messages) // 4
 def trim_history(messages: list[dict], reserved: int) -> list[dict]:
    """Drop oldest message pairs to fit within context budget.
    Always keeps the latest user message."""
    budget = MAX_CONTEXT - reserved
    while len(messages) > 1 and estimate_tokens(messages) > budget:
        messages.pop(0)
    return messages
 RUNNABLE_EXTENSIONS = {".py", ".tex"}
 RUN_TIMEOUT = 30
 def run_file(file_path: Path) -> dict:
    """Execute a .py or .tex file and return stdout, stderr, and return code."""
    suffix = file_path.suffix
    cwd = file_path.parent.resolve()
    if suffix == ".py":
        cmd = ["python3", file_path.name]
    elif suffix == ".tex":
        cmd = [
            "pdflatex",
            "-interaction=nonstopmode",
            f"-output-directory={cwd}",
            file_path.name,
        ]
    else:
        return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
    try:
        proc = subprocess.run(
            cmd,
            cwd=cwd,
            capture_output=True,
            text=True,
            timeout=RUN_TIMEOUT,
        )
        return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
    except subprocess.TimeoutExpired:
        return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
    except FileNotFoundError as e:
        return {"stdout": "", "stderr": str(e), "rc": -1}
 # ---------------------------------------------------------------------------
 # Sidebar — File Manager
 # ---------------------------------------------------------------------------
@ -100,6 +154,10 @@ with tab_chat:
        with st.chat_message("user"):
            st.markdown(prompt)
        st.session_state.messages = trim_history(
            st.session_state.messages, reserved=max_tokens
        )
        with st.chat_message("assistant"):
            placeholder = st.empty()
            full_response = ""
@ -123,6 +181,13 @@ with tab_chat:
        st.session_state.messages.append({"role": "assistant", "content": full_response})
    if st.session_state.messages:
        used = estimate_tokens(st.session_state.messages)
        pct = min(used / MAX_CONTEXT, 1.0)
        label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
        if pct > 0.8:
            label += " ⚠️ nearing limit — older messages will be trimmed"
        st.progress(pct, text=label)
        col_clear, col_save = st.columns([1, 3])
        with col_clear:
            if st.button("Clear Chat"):
@ -149,54 +214,103 @@ with tab_editor:
        content = file_path.read_text() if file_path.exists() else ""
        suffix = file_path.suffix
        lang = LANG_MAP.get(suffix, "text")
        runnable = suffix in RUNNABLE_EXTENSIONS
-        st.code(content, language=lang if lang != "text" else None, line_numbers=True)
+        if runnable:
            col_edit, col_term = st.columns([3, 2])
        else:
            col_edit = st.container()
-        edited = st.text_area(
+        with col_edit:
-            "Edit below:",
+            st.code(content, language=lang if lang != "text" else None, line_numbers=True)
            value=content,
            height=400,
            key=f"editor_{selected_file}_{hash(content)}",
        )
-        col_save, col_gen = st.columns(2)
+            edited = st.text_area(
-
+                "Edit below:",
-        with col_save:
+                value=content,
-            if st.button("Save File"):
+                height=400,
-                file_path.write_text(edited)
+                key=f"editor_{selected_file}_{hash(content)}",
                st.success(f"Saved {selected_file}")
                st.rerun()
        with col_gen:
            gen_prompt = st.text_input(
                "Generation instruction",
                placeholder="e.g. Add error handling / Fix the LaTeX formatting",
                key="gen_prompt",
            )
-            if st.button("Generate with LLM") and gen_prompt:
+
-                with st.spinner("Generating..."):
+            col_save, col_gen = st.columns(2)
-                    response = client.chat.completions.create(
+
-                        model=MODEL,
+            with col_save:
-                        messages=[
+                if st.button("Save File"):
-                            {"role": "system", "content": (
+                    file_path.write_text(edited)
-                                f"You are a coding assistant. The user has a {lang} file. "
+                    st.success(f"Saved {selected_file}")
                                "Return ONLY the raw file content inside a single code block. "
                                "No explanations, no comments about changes."
                            )},
                            {"role": "user", "content": (
                                f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
                                f"Instruction: {gen_prompt}"
                            )},
                        ],
                        max_tokens=max_tokens,
                        temperature=temperature,
                        top_p=top_p,
                        extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
                    )
                    result = response.choices[0].message.content
                    code = extract_code(result, lang)
                    file_path.write_text(code)
                    st.success("File updated by LLM")
                    st.rerun()
            with col_gen:
                gen_prompt = st.text_input(
                    "Generation instruction",
                    placeholder="e.g. Add error handling / Fix the LaTeX formatting",
                    key="gen_prompt",
                )
                if st.button("Generate with LLM") and gen_prompt:
                    with st.spinner("Generating..."):
                        response = client.chat.completions.create(
                            model=MODEL,
                            messages=[
                                {"role": "system", "content": (
                                    f"You are a coding assistant. The user has a {lang} file. "
                                    "Return ONLY the raw file content inside a single code block. "
                                    "No explanations, no comments about changes."
                                )},
                                {"role": "user", "content": (
                                    f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
                                    f"Instruction: {gen_prompt}"
                                )},
                            ],
                            max_tokens=max_tokens,
                            temperature=temperature,
                            top_p=top_p,
                            extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
                        )
                        result = response.choices[0].message.content
                        code = extract_code(result, lang)
                        file_path.write_text(code)
                        st.success("File updated by LLM")
                        st.rerun()
        if runnable:
            with col_term:
                run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
                st.subheader("Terminal Output")
                if st.button(run_label, type="primary"):
                    file_path.write_text(edited)
                    with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
                        result = run_file(file_path)
                    st.session_state["last_run"] = result
                result = st.session_state.get("last_run")
                if result:
                    if result["rc"] == 0:
                        st.success(f"Exit code: {result['rc']}")
                    else:
                        st.error(f"Exit code: {result['rc']}")
                    if result["stdout"]:
                        st.text_area(
                            "stdout",
                            value=result["stdout"],
                            height=300,
                            disabled=True,
                            key="run_stdout",
                        )
                    if result["stderr"]:
                        st.text_area(
                            "stderr",
                            value=result["stderr"],
                            height=200,
                            disabled=True,
                            key="run_stderr",
                        )
                    if not result["stdout"] and not result["stderr"]:
                        st.info("No output produced.")
                else:
                    st.caption(
                        f"Click **{run_label}** to execute the file "
                        f"(timeout: {RUN_TIMEOUT}s)."
                    )
    else:
        st.info("Create a file in the sidebar to start editing.")