From a5657c3c1f0ad5ff4e6481aabc7ef3f78f852d36 Mon Sep 17 00:00:00 2001 From: herzogflorian Date: Mon, 2 Mar 2026 20:03:45 +0100 Subject: [PATCH] Add dynamic model discovery and improve code extraction in app Auto-detect available models from the vLLM API instead of hardcoding. Extract code blocks by matching on language tag and picking the largest block, avoiding false matches on short pip/run commands. Made-with: Cursor --- app.py | 50 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index 0654aa0..7a6dc39 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,5 @@ """ -Streamlit Chat & File Editor for Qwen3.5-35B-A3B +Streamlit Chat & File Editor for Qwen3.5 A minimal interface to: 1. Chat with the local LLM (OpenAI-compatible API) @@ -22,12 +22,29 @@ from pathlib import Path st.sidebar.header("Connection") API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1") API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password") -MODEL = "qwen3.5-35b-a3b" WORKSPACE = Path("workspace") WORKSPACE.mkdir(exist_ok=True) client = OpenAI(base_url=API_BASE, api_key=API_KEY) + +@st.cache_data(ttl=30) +def fetch_models(base_url: str, api_key: str) -> list[str]: + """Fetch available model IDs from the vLLM server.""" + try: + c = OpenAI(base_url=base_url, api_key=api_key) + return [m.id for m in c.models.list().data] + except Exception: + return [] + + +available_models = fetch_models(API_BASE, API_KEY) +if available_models: + MODEL = st.sidebar.selectbox("Model", available_models) +else: + MODEL = st.sidebar.text_input("Model (server unreachable)", "qwen3.5-35b-a3b") + st.sidebar.warning("Could not fetch models from server.") + # --------------------------------------------------------------------------- # Sidebar — LLM Parameters # --------------------------------------------------------------------------- @@ -56,13 +73,26 @@ MAX_CONTEXT = 32768 def extract_code(text: str, lang: str = "") -> str: - """Extract the first fenced code block from markdown text. - Falls back to the full text if no code block is found.""" - pattern = r"```(?:\w*)\n(.*?)```" - match = re.search(pattern, text, re.DOTALL) - if match: - return match.group(1).strip() - return text.strip() + """Extract the best code block from markdown text. + + Strategy: + 1. Prefer blocks tagged with the target language (e.g. ```python) + 2. Among candidates, pick the longest block (skip trivial one-liners) + 3. Fall back to the longest block of any language + 4. Fall back to the full text if no fenced block is found + """ + tagged_pattern = r"```(\w*)\n(.*?)```" + matches = re.findall(tagged_pattern, text, re.DOTALL) + if not matches: + return text.strip() + + lang_lower = lang.lower() + lang_matches = [code for tag, code in matches if tag.lower() == lang_lower] + if lang_matches: + return max(lang_matches, key=len).strip() + + all_blocks = [code for _, code in matches] + return max(all_blocks, key=len).strip() def estimate_tokens(messages: list[dict]) -> int: @@ -140,7 +170,7 @@ tab_chat, tab_editor = st.tabs(["Chat", "File Editor"]) # Tab 1: Chat # --------------------------------------------------------------------------- with tab_chat: - st.header("Chat with Qwen3.5") + st.header(f"Chat with {MODEL}") if "messages" not in st.session_state: st.session_state.messages = []