Add dynamic model discovery and improve code extraction in app
Auto-detect available models from the vLLM API instead of hardcoding. Extract code blocks by matching on language tag and picking the largest block, avoiding false matches on short pip/run commands. Made-with: Cursor
This commit is contained in:
parent
a9ed1060cc
commit
a5657c3c1f
50
app.py
50
app.py
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Streamlit Chat & File Editor for Qwen3.5-35B-A3B
|
Streamlit Chat & File Editor for Qwen3.5
|
||||||
|
|
||||||
A minimal interface to:
|
A minimal interface to:
|
||||||
1. Chat with the local LLM (OpenAI-compatible API)
|
1. Chat with the local LLM (OpenAI-compatible API)
|
||||||
@ -22,12 +22,29 @@ from pathlib import Path
|
|||||||
st.sidebar.header("Connection")
|
st.sidebar.header("Connection")
|
||||||
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
|
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
|
||||||
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
|
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
|
||||||
MODEL = "qwen3.5-35b-a3b"
|
|
||||||
WORKSPACE = Path("workspace")
|
WORKSPACE = Path("workspace")
|
||||||
WORKSPACE.mkdir(exist_ok=True)
|
WORKSPACE.mkdir(exist_ok=True)
|
||||||
|
|
||||||
client = OpenAI(base_url=API_BASE, api_key=API_KEY)
|
client = OpenAI(base_url=API_BASE, api_key=API_KEY)
|
||||||
|
|
||||||
|
|
||||||
|
@st.cache_data(ttl=30)
|
||||||
|
def fetch_models(base_url: str, api_key: str) -> list[str]:
|
||||||
|
"""Fetch available model IDs from the vLLM server."""
|
||||||
|
try:
|
||||||
|
c = OpenAI(base_url=base_url, api_key=api_key)
|
||||||
|
return [m.id for m in c.models.list().data]
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
available_models = fetch_models(API_BASE, API_KEY)
|
||||||
|
if available_models:
|
||||||
|
MODEL = st.sidebar.selectbox("Model", available_models)
|
||||||
|
else:
|
||||||
|
MODEL = st.sidebar.text_input("Model (server unreachable)", "qwen3.5-35b-a3b")
|
||||||
|
st.sidebar.warning("Could not fetch models from server.")
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Sidebar — LLM Parameters
|
# Sidebar — LLM Parameters
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -56,13 +73,26 @@ MAX_CONTEXT = 32768
|
|||||||
|
|
||||||
|
|
||||||
def extract_code(text: str, lang: str = "") -> str:
|
def extract_code(text: str, lang: str = "") -> str:
|
||||||
"""Extract the first fenced code block from markdown text.
|
"""Extract the best code block from markdown text.
|
||||||
Falls back to the full text if no code block is found."""
|
|
||||||
pattern = r"```(?:\w*)\n(.*?)```"
|
Strategy:
|
||||||
match = re.search(pattern, text, re.DOTALL)
|
1. Prefer blocks tagged with the target language (e.g. ```python)
|
||||||
if match:
|
2. Among candidates, pick the longest block (skip trivial one-liners)
|
||||||
return match.group(1).strip()
|
3. Fall back to the longest block of any language
|
||||||
return text.strip()
|
4. Fall back to the full text if no fenced block is found
|
||||||
|
"""
|
||||||
|
tagged_pattern = r"```(\w*)\n(.*?)```"
|
||||||
|
matches = re.findall(tagged_pattern, text, re.DOTALL)
|
||||||
|
if not matches:
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
lang_lower = lang.lower()
|
||||||
|
lang_matches = [code for tag, code in matches if tag.lower() == lang_lower]
|
||||||
|
if lang_matches:
|
||||||
|
return max(lang_matches, key=len).strip()
|
||||||
|
|
||||||
|
all_blocks = [code for _, code in matches]
|
||||||
|
return max(all_blocks, key=len).strip()
|
||||||
|
|
||||||
|
|
||||||
def estimate_tokens(messages: list[dict]) -> int:
|
def estimate_tokens(messages: list[dict]) -> int:
|
||||||
@ -140,7 +170,7 @@ tab_chat, tab_editor = st.tabs(["Chat", "File Editor"])
|
|||||||
# Tab 1: Chat
|
# Tab 1: Chat
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
with tab_chat:
|
with tab_chat:
|
||||||
st.header("Chat with Qwen3.5")
|
st.header(f"Chat with {MODEL}")
|
||||||
|
|
||||||
if "messages" not in st.session_state:
|
if "messages" not in st.session_state:
|
||||||
st.session_state.messages = []
|
st.session_state.messages = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user