From f4fdaab73278f70a10bda7657983f1f514cbb27b Mon Sep 17 00:00:00 2001 From: herzogflorian Date: Mon, 2 Mar 2026 18:48:51 +0100 Subject: [PATCH] Add Open WebUI integration and enhance Streamlit app - Add Open WebUI scripts (06-09) for server-hosted ChatGPT-like interface connected to the vLLM backend on port 7081 - Add context window management to chat (auto-trim, token counter, progress bar) - Add terminal output panel to file editor for running Python/LaTeX files - Update README with Open WebUI setup, architecture diagram, and troubleshooting - Update STUDENT_GUIDE with step-by-step Open WebUI login instructions Made-with: Cursor --- .gitignore | 3 + 06_setup_openwebui.sh | 37 ++++++ 07_start_openwebui.sh | 70 +++++++++++ 08_start_openwebui_background.sh | 52 ++++++++ 09_stop_openwebui.sh | 31 +++++ README.md | 162 ++++++++++++++++++++----- STUDENT_GUIDE.md | 91 ++++++++++++-- app.py | 202 ++++++++++++++++++++++++------- 8 files changed, 559 insertions(+), 89 deletions(-) create mode 100755 06_setup_openwebui.sh create mode 100755 07_start_openwebui.sh create mode 100755 08_start_openwebui_background.sh create mode 100755 09_stop_openwebui.sh diff --git a/.gitignore b/.gitignore index 1cab20f..cdb2d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,8 @@ models/ # Streamlit workspace files workspace/ +# Open WebUI persistent data (user accounts, chats, DB) +openwebui-data/ + # macOS .DS_Store diff --git a/06_setup_openwebui.sh b/06_setup_openwebui.sh new file mode 100755 index 0000000..b1891d1 --- /dev/null +++ b/06_setup_openwebui.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------ +# 06_setup_openwebui.sh +# Pulls the Open WebUI container image and creates the data directory +# for persistent storage (user accounts, chat history, settings). +# +# Usage: +# bash 06_setup_openwebui.sh +# ------------------------------------------------------------------ +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SIF_FILE="${SCRIPT_DIR}/open-webui.sif" +DATA_DIR="${SCRIPT_DIR}/openwebui-data" + +if [ -f "$SIF_FILE" ]; then + echo "Open WebUI container already exists at ${SIF_FILE}" + echo "Delete it first if you want to rebuild:" + echo " rm ${SIF_FILE}" + exit 0 +fi + +echo "=== Pulling Open WebUI container image ===" +echo " Source: ghcr.io/open-webui/open-webui:main" +echo " This may take 5-10 minutes (~4 GB)..." +echo "" + +apptainer pull "$SIF_FILE" docker://ghcr.io/open-webui/open-webui:main + +mkdir -p "$DATA_DIR" + +echo "" +echo "=== Setup complete ===" +echo "Image: ${SIF_FILE} ($(du -sh "$SIF_FILE" | cut -f1))" +echo "Data dir: ${DATA_DIR}" +echo "" +echo "Next: bash 07_start_openwebui.sh" diff --git a/07_start_openwebui.sh b/07_start_openwebui.sh new file mode 100755 index 0000000..95798d0 --- /dev/null +++ b/07_start_openwebui.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------ +# 07_start_openwebui.sh +# Starts Open WebUI connected to the vLLM inference server. +# +# Open WebUI provides a ChatGPT-like interface with: +# - User accounts & chat history (persisted in openwebui-data/) +# - Model selector (auto-discovers models from vLLM) +# - Streaming responses, markdown rendering, code highlighting +# +# The first user to sign up becomes the admin. +# +# Usage: +# bash 07_start_openwebui.sh # defaults +# PORT=7082 bash 07_start_openwebui.sh # custom port +# VLLM_BASE_URL=http://localhost:7080/v1 bash 07_start_openwebui.sh +# +# Environment variables: +# PORT — HTTP port for Open WebUI (default: 7081) +# VLLM_BASE_URL — vLLM OpenAI-compatible URL (default: http://localhost:7080/v1) +# VLLM_API_KEY — API key for vLLM (default: EMPTY) +# DATA_DIR — Persistent storage path (default: ./openwebui-data) +# ------------------------------------------------------------------ +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SIF_FILE="${SCRIPT_DIR}/open-webui.sif" + +PORT="${PORT:-7081}" +VLLM_BASE_URL="${VLLM_BASE_URL:-http://localhost:7080/v1}" +VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}" +DATA_DIR="${DATA_DIR:-${SCRIPT_DIR}/openwebui-data}" + +if [ ! -f "$SIF_FILE" ]; then + echo "ERROR: Container image not found at ${SIF_FILE}" + echo " Run 06_setup_openwebui.sh first." + exit 1 +fi + +mkdir -p "$DATA_DIR" + +echo "=== Starting Open WebUI ===" +echo " Port: ${PORT}" +echo " vLLM backend: ${VLLM_BASE_URL}" +echo " Data directory: ${DATA_DIR}" +echo "" +echo " Access at: http://$(hostname -f 2>/dev/null || hostname):${PORT}" +echo " First user to sign up becomes admin." +echo "" +echo " Press Ctrl+C to stop." +echo "===========================================" +echo "" + +apptainer exec \ + --writable-tmpfs \ + --pwd /app/backend \ + --bind "${DATA_DIR}:/app/backend/data" \ + --env PORT="${PORT}" \ + --env ENABLE_OPENAI_API="True" \ + --env OPENAI_API_BASE_URLS="${VLLM_BASE_URL}" \ + --env OPENAI_API_KEYS="${VLLM_API_KEY}" \ + --env ENABLE_OLLAMA_API="False" \ + --env ENABLE_SIGNUP="True" \ + --env DEFAULT_USER_ROLE="user" \ + --env WEBUI_NAME="Qwen3.5 LLM Server" \ + --env OFFLINE_MODE="True" \ + --env ENABLE_VERSION_UPDATE_CHECK="False" \ + --env HF_HUB_OFFLINE="1" \ + "$SIF_FILE" \ + bash start.sh diff --git a/08_start_openwebui_background.sh b/08_start_openwebui_background.sh new file mode 100755 index 0000000..8eba5ff --- /dev/null +++ b/08_start_openwebui_background.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------ +# 08_start_openwebui_background.sh +# Launches Open WebUI in the background with logging. +# +# Usage: +# bash 08_start_openwebui_background.sh +# +# Logs are written to: ./logs/openwebui_.log +# PID is written to: ./logs/openwebui.pid +# ------------------------------------------------------------------ +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +LOG_DIR="${SCRIPT_DIR}/logs" +mkdir -p "$LOG_DIR" + +TIMESTAMP="$(date +%Y%m%d_%H%M%S)" +LOG_FILE="${LOG_DIR}/openwebui_${TIMESTAMP}.log" +PID_FILE="${LOG_DIR}/openwebui.pid" + +if [ -f "$PID_FILE" ]; then + OLD_PID=$(cat "$PID_FILE") + if kill -0 "$OLD_PID" 2>/dev/null; then + echo "Open WebUI already running with PID ${OLD_PID}" + echo "Stop it first: bash 09_stop_openwebui.sh" + exit 1 + fi +fi + +echo "Starting Open WebUI in background..." +echo "Log file: ${LOG_FILE}" + +nohup bash "${SCRIPT_DIR}/07_start_openwebui.sh" > "$LOG_FILE" 2>&1 & +SERVER_PID=$! +echo "$SERVER_PID" > "$PID_FILE" + +echo "Open WebUI PID: ${SERVER_PID}" +echo "" +echo "Monitor logs: tail -f ${LOG_FILE}" +echo "Stop: bash 09_stop_openwebui.sh" +echo "" + +sleep 5 +if kill -0 "$SERVER_PID" 2>/dev/null; then + echo "Open WebUI process is running. Starting up..." + echo "(Ready when you see 'Uvicorn running' in the logs)" +else + echo "ERROR: Open WebUI process exited. Check logs:" + tail -20 "$LOG_FILE" + exit 1 +fi diff --git a/09_stop_openwebui.sh b/09_stop_openwebui.sh new file mode 100755 index 0000000..f31de2e --- /dev/null +++ b/09_stop_openwebui.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# ------------------------------------------------------------------ +# 09_stop_openwebui.sh +# Gracefully stops the background Open WebUI server. +# ------------------------------------------------------------------ +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PID_FILE="${SCRIPT_DIR}/logs/openwebui.pid" + +if [ ! -f "$PID_FILE" ]; then + echo "No PID file found. Open WebUI may not be running." + exit 0 +fi + +SERVER_PID=$(cat "$PID_FILE") + +if kill -0 "$SERVER_PID" 2>/dev/null; then + echo "Stopping Open WebUI (PID: ${SERVER_PID})..." + kill "$SERVER_PID" + sleep 2 + if kill -0 "$SERVER_PID" 2>/dev/null; then + echo "Process still alive, sending SIGKILL..." + kill -9 "$SERVER_PID" + fi + echo "Open WebUI stopped." +else + echo "Open WebUI process (PID: ${SERVER_PID}) is not running." +fi + +rm -f "$PID_FILE" diff --git a/README.md b/README.md index 7c6dd8d..04414dd 100644 --- a/README.md +++ b/README.md @@ -2,28 +2,35 @@ Self-hosted LLM inference for ~15 concurrent students using **Qwen3.5-35B-A3B** (MoE, 35B total / 3B active per token), served via **vLLM** inside an -**Apptainer** container on a GPU server. Includes a **Streamlit web app** for -chat and file editing. +**Apptainer** container on a GPU server. Two front-ends are provided: +**Open WebUI** (server-hosted ChatGPT-like UI) and a **Streamlit app** +(local chat + file editor with code execution). ## Architecture ``` -Students (Streamlit App / OpenAI SDK / curl) - │ - ▼ - ┌──────────────────────────────┐ - │ silicon.fhgr.ch:7080 │ - │ OpenAI-compatible API │ - ├──────────────────────────────┤ - │ vLLM Server (nightly) │ - │ Apptainer container (.sif) │ - ├──────────────────────────────┤ - │ Qwen3.5-35B-A3B weights │ - │ (bind-mounted from host) │ - ├──────────────────────────────┤ - │ 2× NVIDIA L40S (46 GB ea.) │ - │ Tensor Parallel = 2 │ - └──────────────────────────────┘ +Students + │ + ├── Browser ──► Open WebUI (silicon.fhgr.ch:7081) + │ │ ChatGPT-like UI, user accounts, chat history + │ │ + ├── Streamlit ─────┤ Local app with file editor & code runner + │ │ + └── SDK / curl ────┘ + ▼ + ┌──────────────────────────────┐ + │ silicon.fhgr.ch:7080 │ + │ OpenAI-compatible API │ + ├──────────────────────────────┤ + │ vLLM Server (nightly) │ + │ Apptainer container (.sif) │ + ├──────────────────────────────┤ + │ Qwen3.5-35B-A3B weights │ + │ (bind-mounted from host) │ + ├──────────────────────────────┤ + │ 2× NVIDIA L40S (46 GB ea.) │ + │ Tensor Parallel = 2 │ + └──────────────────────────────┘ ``` ## Hardware @@ -134,14 +141,90 @@ curl http://localhost:7080/v1/chat/completions \ -d '{"model":"qwen3.5-35b-a3b","messages":[{"role":"user","content":"Hello!"}],"max_tokens":128}' ``` -### Step 7: Share with Students +### Step 7: Set Up Open WebUI (ChatGPT-like Interface) + +Open WebUI provides a full-featured chat interface that runs on the server. +Students access it via a browser — no local setup required. + +**Pull the container:** +```bash +bash 06_setup_openwebui.sh +``` + +**Start (foreground with tmux):** +```bash +tmux new -s webui +bash 07_start_openwebui.sh +# Ctrl+B, then D to detach +``` + +**Start (background with logging):** +```bash +bash 08_start_openwebui_background.sh +tail -f logs/openwebui_*.log +``` + +Open WebUI is ready when you see `Uvicorn running` in the logs. +Access it at `http://silicon.fhgr.ch:7081`. + +> **Important**: The first user to sign up becomes the **admin**. Sign up +> yourself first before sharing the URL with students. + +### Step 8: Share with Students Distribute `STUDENT_GUIDE.md` with connection details: -- **Base URL**: `http://silicon.fhgr.ch:7080/v1` +- **Open WebUI**: `http://silicon.fhgr.ch:7081` (recommended for most students) +- **API Base URL**: `http://silicon.fhgr.ch:7080/v1` (for SDK / programmatic use) - **Model name**: `qwen3.5-35b-a3b` --- +## Open WebUI + +A server-hosted ChatGPT-like interface backed by the vLLM inference server. +Runs as an Apptainer container on port **7081**. + +### Features + +- User accounts with persistent chat history (stored in `openwebui-data/`) +- Auto-discovers models from the vLLM backend +- Streaming responses, markdown rendering, code highlighting +- Admin panel for managing users, models, and settings +- No local setup needed — students just open a browser + +### Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `7081` | HTTP port for the UI | +| `VLLM_BASE_URL` | `http://localhost:7080/v1` | vLLM API endpoint | +| `VLLM_API_KEY` | `EMPTY` | API key (if vLLM requires one) | +| `DATA_DIR` | `./openwebui-data` | Persistent storage (DB, uploads) | + +### Management + +```bash +# Start in background +bash 08_start_openwebui_background.sh + +# View logs +tail -f logs/openwebui_*.log + +# Stop +bash 09_stop_openwebui.sh + +# Reconnect to tmux session +tmux attach -t webui +``` + +### Data Persistence + +All user data (accounts, chats, settings) is stored in `openwebui-data/`. +This directory is bind-mounted into the container, so data survives +container restarts. Back it up regularly. + +--- + ## Streamlit App A web-based chat and file editor that connects to the inference server. @@ -240,18 +323,22 @@ tmux attach -t llm ## Files Overview -| File | Purpose | -|----------------------------------|------------------------------------------------------| -| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) | -| `01_build_container.sh` | Builds the Apptainer `.sif` image | -| `02_download_model.sh` | Downloads model weights (runs inside container) | -| `03_start_server.sh` | Starts vLLM server (foreground) | -| `04_start_server_background.sh` | Starts server in background with logging | -| `05_stop_server.sh` | Stops the background server | -| `app.py` | Streamlit chat & file editor web app | -| `requirements.txt` | Python dependencies for the Streamlit app | -| `test_server.py` | Tests the running server via CLI | -| `STUDENT_GUIDE.md` | Instructions for students | +| File | Purpose | +|------------------------------------|------------------------------------------------------| +| `vllm_qwen.def` | Apptainer container definition (vLLM nightly + deps) | +| `01_build_container.sh` | Builds the Apptainer `.sif` image | +| `02_download_model.sh` | Downloads model weights (runs inside container) | +| `03_start_server.sh` | Starts vLLM server (foreground) | +| `04_start_server_background.sh` | Starts vLLM server in background with logging | +| `05_stop_server.sh` | Stops the background vLLM server | +| `06_setup_openwebui.sh` | Pulls the Open WebUI container image | +| `07_start_openwebui.sh` | Starts Open WebUI (foreground) | +| `08_start_openwebui_background.sh` | Starts Open WebUI in background with logging | +| `09_stop_openwebui.sh` | Stops the background Open WebUI | +| `app.py` | Streamlit chat & file editor web app | +| `requirements.txt` | Python dependencies for the Streamlit app | +| `test_server.py` | Tests the running server via CLI | +| `STUDENT_GUIDE.md` | Instructions for students | --- @@ -285,6 +372,17 @@ tmux attach -t llm - Disable thinking mode for faster simple responses - Monitor: `curl http://localhost:7080/metrics` +### Open WebUI won't start +- Ensure the vLLM server is running first on port 7080 +- Check that port 7081 is not already in use: `ss -tlnp | grep 7081` +- Check logs: `tail -50 logs/openwebui_*.log` +- If the database is corrupted, reset: `rm openwebui-data/webui.db` and restart + +### Open WebUI shows no models +- Verify vLLM is reachable: `curl http://localhost:7080/v1/models` +- The OpenAI API base URL is set on first launch; if changed later, update + it in the Open WebUI Admin Panel > Settings > Connections + ### Syncing files to the server - No `git` or `pip` on the host — use `scp` from your local machine: ```bash diff --git a/STUDENT_GUIDE.md b/STUDENT_GUIDE.md index 7a0601c..7009a09 100644 --- a/STUDENT_GUIDE.md +++ b/STUDENT_GUIDE.md @@ -4,30 +4,93 @@ A **Qwen3.5-35B-A3B** language model is running on our GPU server. It's a Mixture-of-Experts model (35B total parameters, 3B active per token), providing -fast and high-quality responses. You can interact with it using the -**OpenAI-compatible API**. +fast and high-quality responses. -## Connection Details +There are **three ways** to interact with the model: -| Parameter | Value | -|------------- |---------------------------------------------| -| **Base URL** | `http://silicon.fhgr.ch:7080/v1` | -| **Model** | `qwen3.5-35b-a3b` | -| **API Key** | *(ask your instructor — may be `EMPTY`)* | +1. **Open WebUI** — ChatGPT-like interface in your browser (easiest) +2. **Streamlit App** — Local app with chat, file editor, and code execution +3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API > **Note**: You must be on the university network or VPN to reach the server. +## Connection Details + +| Parameter | Value | +|------------------|---------------------------------------------| +| **Open WebUI** | `http://silicon.fhgr.ch:7081` | +| **API Base URL** | `http://silicon.fhgr.ch:7080/v1` | +| **Model** | `qwen3.5-35b-a3b` | +| **API Key** | *(ask your instructor — may be `EMPTY`)* | + --- -## Quick Start with Python +## Option 1: Open WebUI (Recommended) -### 1. Install the OpenAI SDK +The easiest way to chat with the model — no installation required. + +### Getting Started + +1. Make sure you are connected to the **university network** (or VPN). +2. Open your browser and go to **http://silicon.fhgr.ch:7081** +3. Click **"Sign Up"** to create a new account: + - Enter your **name** (e.g. your first and last name) + - Enter your **email** (use your university email) + - Choose a **password** + - Click **"Create Account"** +4. After signing up you are logged in automatically. +5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top. +6. Type a message and press Enter — you're chatting with the LLM. + +### Returning Later + +- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**. +- Enter the email and password you used during sign-up. +- All your previous chats are still there. + +### Features + +- **Chat history** — all conversations are saved on the server and persist across sessions +- **Markdown rendering** with syntax-highlighted code blocks +- **Model selector** — auto-discovers available models from the server +- **Conversation branching** — edit previous messages and explore alternative responses +- **File upload** — attach files to your messages for the model to analyze +- **Search** — search across all your past conversations + +### Tips + +- Your account and chat history are stored on the server. You can log in + from any device on the university network. +- If you forget your password, ask your instructor to reset it via the + Admin Panel. +- The model works best when you provide clear, specific instructions. +- For code tasks, mention the programming language explicitly (e.g. + "Write a Python function that..."). +- Long conversations use more context. Start a **New Chat** (top-left + button) when switching topics to get faster, more focused responses. + +--- + +## Option 2: Streamlit App (Chat + File Editor) + +A local app with chat, file editing, and Python/LaTeX execution. +See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup. + +--- + +## Option 3: Python SDK / curl + +For programmatic access and scripting. + +### Quick Start with Python + +#### 1. Install the OpenAI SDK ```bash pip install openai ``` -### 2. Simple Chat +#### 2. Simple Chat ```python from openai import OpenAI @@ -50,7 +113,7 @@ response = client.chat.completions.create( print(response.choices[0].message.content) ``` -### 3. Streaming Responses +#### 3. Streaming Responses ```python stream = client.chat.completions.create( @@ -70,7 +133,7 @@ print() --- -## Quick Start with curl +### Quick Start with curl ```bash curl http://silicon.fhgr.ch:7080/v1/chat/completions \ @@ -196,3 +259,5 @@ response = client.chat.completions.create( | Slow responses | The model is shared — peak times may be slower | | `401 Unauthorized` | Ask your instructor for the API key | | Response cut off | Increase `max_tokens` in your request | +| Open WebUI login fails | Make sure you created an account first (Sign Up) | +| Open WebUI shows no models | The vLLM server may still be loading — wait a few minutes | diff --git a/app.py b/app.py index faf9cdd..0654aa0 100644 --- a/app.py +++ b/app.py @@ -11,6 +11,7 @@ Usage: """ import re +import subprocess import streamlit as st from openai import OpenAI from pathlib import Path @@ -51,6 +52,9 @@ LANG_MAP = { } +MAX_CONTEXT = 32768 + + def extract_code(text: str, lang: str = "") -> str: """Extract the first fenced code block from markdown text. Falls back to the full text if no code block is found.""" @@ -61,6 +65,56 @@ def extract_code(text: str, lang: str = "") -> str: return text.strip() +def estimate_tokens(messages: list[dict]) -> int: + """Rough token estimate: ~4 characters per token.""" + return sum(len(m["content"]) for m in messages) // 4 + + +def trim_history(messages: list[dict], reserved: int) -> list[dict]: + """Drop oldest message pairs to fit within context budget. + Always keeps the latest user message.""" + budget = MAX_CONTEXT - reserved + while len(messages) > 1 and estimate_tokens(messages) > budget: + messages.pop(0) + return messages + + +RUNNABLE_EXTENSIONS = {".py", ".tex"} +RUN_TIMEOUT = 30 + + +def run_file(file_path: Path) -> dict: + """Execute a .py or .tex file and return stdout, stderr, and return code.""" + suffix = file_path.suffix + cwd = file_path.parent.resolve() + + if suffix == ".py": + cmd = ["python3", file_path.name] + elif suffix == ".tex": + cmd = [ + "pdflatex", + "-interaction=nonstopmode", + f"-output-directory={cwd}", + file_path.name, + ] + else: + return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1} + + try: + proc = subprocess.run( + cmd, + cwd=cwd, + capture_output=True, + text=True, + timeout=RUN_TIMEOUT, + ) + return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode} + except subprocess.TimeoutExpired: + return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1} + except FileNotFoundError as e: + return {"stdout": "", "stderr": str(e), "rc": -1} + + # --------------------------------------------------------------------------- # Sidebar — File Manager # --------------------------------------------------------------------------- @@ -100,6 +154,10 @@ with tab_chat: with st.chat_message("user"): st.markdown(prompt) + st.session_state.messages = trim_history( + st.session_state.messages, reserved=max_tokens + ) + with st.chat_message("assistant"): placeholder = st.empty() full_response = "" @@ -123,6 +181,13 @@ with tab_chat: st.session_state.messages.append({"role": "assistant", "content": full_response}) if st.session_state.messages: + used = estimate_tokens(st.session_state.messages) + pct = min(used / MAX_CONTEXT, 1.0) + label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens" + if pct > 0.8: + label += " ⚠️ nearing limit — older messages will be trimmed" + st.progress(pct, text=label) + col_clear, col_save = st.columns([1, 3]) with col_clear: if st.button("Clear Chat"): @@ -149,54 +214,103 @@ with tab_editor: content = file_path.read_text() if file_path.exists() else "" suffix = file_path.suffix lang = LANG_MAP.get(suffix, "text") + runnable = suffix in RUNNABLE_EXTENSIONS - st.code(content, language=lang if lang != "text" else None, line_numbers=True) + if runnable: + col_edit, col_term = st.columns([3, 2]) + else: + col_edit = st.container() - edited = st.text_area( - "Edit below:", - value=content, - height=400, - key=f"editor_{selected_file}_{hash(content)}", - ) + with col_edit: + st.code(content, language=lang if lang != "text" else None, line_numbers=True) - col_save, col_gen = st.columns(2) - - with col_save: - if st.button("Save File"): - file_path.write_text(edited) - st.success(f"Saved {selected_file}") - st.rerun() - - with col_gen: - gen_prompt = st.text_input( - "Generation instruction", - placeholder="e.g. Add error handling / Fix the LaTeX formatting", - key="gen_prompt", + edited = st.text_area( + "Edit below:", + value=content, + height=400, + key=f"editor_{selected_file}_{hash(content)}", ) - if st.button("Generate with LLM") and gen_prompt: - with st.spinner("Generating..."): - response = client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": ( - f"You are a coding assistant. The user has a {lang} file. " - "Return ONLY the raw file content inside a single code block. " - "No explanations, no comments about changes." - )}, - {"role": "user", "content": ( - f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n" - f"Instruction: {gen_prompt}" - )}, - ], - max_tokens=max_tokens, - temperature=temperature, - top_p=top_p, - extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}}, - ) - result = response.choices[0].message.content - code = extract_code(result, lang) - file_path.write_text(code) - st.success("File updated by LLM") + + col_save, col_gen = st.columns(2) + + with col_save: + if st.button("Save File"): + file_path.write_text(edited) + st.success(f"Saved {selected_file}") st.rerun() + + with col_gen: + gen_prompt = st.text_input( + "Generation instruction", + placeholder="e.g. Add error handling / Fix the LaTeX formatting", + key="gen_prompt", + ) + if st.button("Generate with LLM") and gen_prompt: + with st.spinner("Generating..."): + response = client.chat.completions.create( + model=MODEL, + messages=[ + {"role": "system", "content": ( + f"You are a coding assistant. The user has a {lang} file. " + "Return ONLY the raw file content inside a single code block. " + "No explanations, no comments about changes." + )}, + {"role": "user", "content": ( + f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n" + f"Instruction: {gen_prompt}" + )}, + ], + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}}, + ) + result = response.choices[0].message.content + code = extract_code(result, lang) + file_path.write_text(code) + st.success("File updated by LLM") + st.rerun() + + if runnable: + with col_term: + run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python" + st.subheader("Terminal Output") + + if st.button(run_label, type="primary"): + file_path.write_text(edited) + with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."): + result = run_file(file_path) + st.session_state["last_run"] = result + + result = st.session_state.get("last_run") + if result: + if result["rc"] == 0: + st.success(f"Exit code: {result['rc']}") + else: + st.error(f"Exit code: {result['rc']}") + + if result["stdout"]: + st.text_area( + "stdout", + value=result["stdout"], + height=300, + disabled=True, + key="run_stdout", + ) + if result["stderr"]: + st.text_area( + "stderr", + value=result["stderr"], + height=200, + disabled=True, + key="run_stderr", + ) + if not result["stdout"] and not result["stderr"]: + st.info("No output produced.") + else: + st.caption( + f"Click **{run_label}** to execute the file " + f"(timeout: {RUN_TIMEOUT}s)." + ) else: st.info("Create a file in the sidebar to start editing.")