herzogflorian 12f9e3ac9b Add LLM parameter controls to sidebar
Thinking mode toggle, temperature, max tokens, top_p, and presence
penalty sliders in the Streamlit sidebar. Parameters apply to both
chat and file editor generation.

Made-with: Cursor
2026-03-02 16:41:05 +01:00

203 lines
8.0 KiB
Python

"""
Streamlit Chat & File Editor for Qwen3.5-35B-A3B
A minimal interface to:
1. Chat with the local LLM (OpenAI-compatible API)
2. Edit, save, and generate code / LaTeX files
Usage:
pip install streamlit openai
streamlit run app.py
"""
import re
import streamlit as st
from openai import OpenAI
from pathlib import Path
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
st.sidebar.header("Connection")
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
MODEL = "qwen3.5-35b-a3b"
WORKSPACE = Path("workspace")
WORKSPACE.mkdir(exist_ok=True)
client = OpenAI(base_url=API_BASE, api_key=API_KEY)
# ---------------------------------------------------------------------------
# Sidebar — LLM Parameters
# ---------------------------------------------------------------------------
st.sidebar.markdown("---")
st.sidebar.header("LLM Parameters")
thinking_mode = st.sidebar.toggle("Thinking Mode", value=False,
help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.")
temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05,
help="Lower = deterministic, higher = creative.")
max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256,
help="Maximum length of the response.")
top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05,
help="Nucleus sampling: only consider tokens within this cumulative probability.")
presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1,
help="Penalize repeated topics. Higher values encourage the model to talk about new topics.")
LANG_MAP = {
".py": "python", ".tex": "latex", ".js": "javascript",
".html": "html", ".css": "css", ".sh": "bash",
".json": "json", ".yaml": "yaml", ".yml": "yaml",
}
def extract_code(text: str, lang: str = "") -> str:
"""Extract the first fenced code block from markdown text.
Falls back to the full text if no code block is found."""
pattern = r"```(?:\w*)\n(.*?)```"
match = re.search(pattern, text, re.DOTALL)
if match:
return match.group(1).strip()
return text.strip()
# ---------------------------------------------------------------------------
# Sidebar — File Manager
# ---------------------------------------------------------------------------
st.sidebar.markdown("---")
st.sidebar.header("File Manager")
new_filename = st.sidebar.text_input("New file name", placeholder="main.tex")
if st.sidebar.button("Create File") and new_filename:
(WORKSPACE / new_filename).touch()
st.sidebar.success(f"Created {new_filename}")
st.rerun()
files = sorted(WORKSPACE.iterdir()) if WORKSPACE.exists() else []
file_names = [f.name for f in files if f.is_file()]
selected_file = st.sidebar.selectbox("Open file", file_names if file_names else ["(no files)"])
# ---------------------------------------------------------------------------
# Main Layout — Two Tabs
# ---------------------------------------------------------------------------
tab_chat, tab_editor = st.tabs(["Chat", "File Editor"])
# ---------------------------------------------------------------------------
# Tab 1: Chat
# ---------------------------------------------------------------------------
with tab_chat:
st.header("Chat with Qwen3.5")
if "messages" not in st.session_state:
st.session_state.messages = []
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if prompt := st.chat_input("Ask anything..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
placeholder = st.empty()
full_response = ""
stream = client.chat.completions.create(
model=MODEL,
messages=st.session_state.messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
presence_penalty=presence_penalty,
stream=True,
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
)
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
full_response += delta
placeholder.markdown(full_response + "")
placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
if st.session_state.messages:
col_clear, col_save = st.columns([1, 3])
with col_clear:
if st.button("Clear Chat"):
st.session_state.messages = []
st.rerun()
with col_save:
if selected_file and selected_file != "(no files)":
if st.button(f"Save code → {selected_file}"):
last = st.session_state.messages[-1]["content"]
suffix = Path(selected_file).suffix
lang = LANG_MAP.get(suffix, "")
code = extract_code(last, lang)
(WORKSPACE / selected_file).write_text(code)
st.success(f"Extracted code saved to workspace/{selected_file}")
# ---------------------------------------------------------------------------
# Tab 2: File Editor
# ---------------------------------------------------------------------------
with tab_editor:
st.header("File Editor")
if selected_file and selected_file != "(no files)":
file_path = WORKSPACE / selected_file
content = file_path.read_text() if file_path.exists() else ""
suffix = file_path.suffix
lang = LANG_MAP.get(suffix, "text")
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
edited = st.text_area(
"Edit below:",
value=content,
height=400,
key=f"editor_{selected_file}_{hash(content)}",
)
col_save, col_gen = st.columns(2)
with col_save:
if st.button("Save File"):
file_path.write_text(edited)
st.success(f"Saved {selected_file}")
st.rerun()
with col_gen:
gen_prompt = st.text_input(
"Generation instruction",
placeholder="e.g. Add error handling / Fix the LaTeX formatting",
key="gen_prompt",
)
if st.button("Generate with LLM") and gen_prompt:
with st.spinner("Generating..."):
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": (
f"You are a coding assistant. The user has a {lang} file. "
"Return ONLY the raw file content inside a single code block. "
"No explanations, no comments about changes."
)},
{"role": "user", "content": (
f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
f"Instruction: {gen_prompt}"
)},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
)
result = response.choices[0].message.content
code = extract_code(result, lang)
file_path.write_text(code)
st.success("File updated by LLM")
st.rerun()
else:
st.info("Create a file in the sidebar to start editing.")