From a5657c3c1f0ad5ff4e6481aabc7ef3f78f852d36 Mon Sep 17 00:00:00 2001
From: herzogflorian <herzogflorian@users.noreply.github.com>
Date: Mon, 2 Mar 2026 20:03:45 +0100
Subject: [PATCH] Add dynamic model discovery and improve code extraction in
 app

Auto-detect available models from the vLLM API instead of hardcoding.
Extract code blocks by matching on language tag and picking the largest
block, avoiding false matches on short pip/run commands.

Made-with: Cursor
---
 app.py | 50 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/app.py b/app.py
index 0654aa0..7a6dc39 100644
--- a/app.py
+++ b/app.py
@@ -1,5 +1,5 @@
 """
-Streamlit Chat & File Editor for Qwen3.5-35B-A3B
+Streamlit Chat & File Editor for Qwen3.5
 
 A minimal interface to:
   1. Chat with the local LLM (OpenAI-compatible API)
@@ -22,12 +22,29 @@ from pathlib import Path
 st.sidebar.header("Connection")
 API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
 API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
-MODEL = "qwen3.5-35b-a3b"
 WORKSPACE = Path("workspace")
 WORKSPACE.mkdir(exist_ok=True)
 
 client = OpenAI(base_url=API_BASE, api_key=API_KEY)
 
+
+@st.cache_data(ttl=30)
+def fetch_models(base_url: str, api_key: str) -> list[str]:
+    """Fetch available model IDs from the vLLM server."""
+    try:
+        c = OpenAI(base_url=base_url, api_key=api_key)
+        return [m.id for m in c.models.list().data]
+    except Exception:
+        return []
+
+
+available_models = fetch_models(API_BASE, API_KEY)
+if available_models:
+    MODEL = st.sidebar.selectbox("Model", available_models)
+else:
+    MODEL = st.sidebar.text_input("Model (server unreachable)", "qwen3.5-35b-a3b")
+    st.sidebar.warning("Could not fetch models from server.")
+
 # ---------------------------------------------------------------------------
 # Sidebar — LLM Parameters
 # ---------------------------------------------------------------------------
@@ -56,13 +73,26 @@ MAX_CONTEXT = 32768
 
 
 def extract_code(text: str, lang: str = "") -> str:
-    """Extract the first fenced code block from markdown text.
-    Falls back to the full text if no code block is found."""
-    pattern = r"```(?:\w*)\n(.*?)```"
-    match = re.search(pattern, text, re.DOTALL)
-    if match:
-        return match.group(1).strip()
-    return text.strip()
+    """Extract the best code block from markdown text.
+
+    Strategy:
+      1. Prefer blocks tagged with the target language (e.g. ```python)
+      2. Among candidates, pick the longest block (skip trivial one-liners)
+      3. Fall back to the longest block of any language
+      4. Fall back to the full text if no fenced block is found
+    """
+    tagged_pattern = r"```(\w*)\n(.*?)```"
+    matches = re.findall(tagged_pattern, text, re.DOTALL)
+    if not matches:
+        return text.strip()
+
+    lang_lower = lang.lower()
+    lang_matches = [code for tag, code in matches if tag.lower() == lang_lower]
+    if lang_matches:
+        return max(lang_matches, key=len).strip()
+
+    all_blocks = [code for _, code in matches]
+    return max(all_blocks, key=len).strip()
 
 
 def estimate_tokens(messages: list[dict]) -> int:
@@ -140,7 +170,7 @@ tab_chat, tab_editor = st.tabs(["Chat", "File Editor"])
 # Tab 1: Chat
 # ---------------------------------------------------------------------------
 with tab_chat:
-    st.header("Chat with Qwen3.5")
+    st.header(f"Chat with {MODEL}")
 
     if "messages" not in st.session_state:
         st.session_state.messages = []