From 12f9e3ac9bf5e3c6683fd0e819f9d59fcbd3aa99 Mon Sep 17 00:00:00 2001 From: herzogflorian Date: Mon, 2 Mar 2026 16:41:05 +0100 Subject: [PATCH] Add LLM parameter controls to sidebar Thinking mode toggle, temperature, max tokens, top_p, and presence penalty sliders in the Streamlit sidebar. Parameters apply to both chat and file editor generation. Made-with: Cursor --- app.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index 9e0ea98..faf9cdd 100644 --- a/app.py +++ b/app.py @@ -18,6 +18,7 @@ from pathlib import Path # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- +st.sidebar.header("Connection") API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1") API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password") MODEL = "qwen3.5-35b-a3b" @@ -26,6 +27,23 @@ WORKSPACE.mkdir(exist_ok=True) client = OpenAI(base_url=API_BASE, api_key=API_KEY) +# --------------------------------------------------------------------------- +# Sidebar — LLM Parameters +# --------------------------------------------------------------------------- +st.sidebar.markdown("---") +st.sidebar.header("LLM Parameters") + +thinking_mode = st.sidebar.toggle("Thinking Mode", value=False, + help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.") +temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05, + help="Lower = deterministic, higher = creative.") +max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256, + help="Maximum length of the response.") +top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05, + help="Nucleus sampling: only consider tokens within this cumulative probability.") +presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1, + help="Penalize repeated topics. Higher values encourage the model to talk about new topics.") + LANG_MAP = { ".py": "python", ".tex": "latex", ".js": "javascript", ".html": "html", ".css": "css", ".sh": "bash", @@ -89,10 +107,12 @@ with tab_chat: stream = client.chat.completions.create( model=MODEL, messages=st.session_state.messages, - max_tokens=8092, - temperature=0.2, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + presence_penalty=presence_penalty, stream=True, - extra_body={"chat_template_kwargs": {"enable_thinking": True}}, + extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}}, ) for chunk in stream: delta = chunk.choices[0].delta.content or "" @@ -168,9 +188,10 @@ with tab_editor: f"Instruction: {gen_prompt}" )}, ], - max_tokens=16384, - temperature=0.6, - extra_body={"chat_template_kwargs": {"enable_thinking": False}}, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}}, ) result = response.choices[0].message.content code = extract_code(result, lang)