From 12f9e3ac9bf5e3c6683fd0e819f9d59fcbd3aa99 Mon Sep 17 00:00:00 2001
From: herzogflorian <herzogflorian@users.noreply.github.com>
Date: Mon, 2 Mar 2026 16:41:05 +0100
Subject: [PATCH] Add LLM parameter controls to sidebar

Thinking mode toggle, temperature, max tokens, top_p, and presence
penalty sliders in the Streamlit sidebar. Parameters apply to both
chat and file editor generation.

Made-with: Cursor
---
 app.py | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/app.py b/app.py
index 9e0ea98..faf9cdd 100644
--- a/app.py
+++ b/app.py
@@ -18,6 +18,7 @@ from pathlib import Path
 # ---------------------------------------------------------------------------
 # Configuration
 # ---------------------------------------------------------------------------
+st.sidebar.header("Connection")
 API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
 API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
 MODEL = "qwen3.5-35b-a3b"
@@ -26,6 +27,23 @@ WORKSPACE.mkdir(exist_ok=True)
 
 client = OpenAI(base_url=API_BASE, api_key=API_KEY)
 
+# ---------------------------------------------------------------------------
+# Sidebar — LLM Parameters
+# ---------------------------------------------------------------------------
+st.sidebar.markdown("---")
+st.sidebar.header("LLM Parameters")
+
+thinking_mode = st.sidebar.toggle("Thinking Mode", value=False,
+    help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.")
+temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05,
+    help="Lower = deterministic, higher = creative.")
+max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256,
+    help="Maximum length of the response.")
+top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05,
+    help="Nucleus sampling: only consider tokens within this cumulative probability.")
+presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1,
+    help="Penalize repeated topics. Higher values encourage the model to talk about new topics.")
+
 LANG_MAP = {
     ".py": "python", ".tex": "latex", ".js": "javascript",
     ".html": "html", ".css": "css", ".sh": "bash",
@@ -89,10 +107,12 @@ with tab_chat:
             stream = client.chat.completions.create(
                 model=MODEL,
                 messages=st.session_state.messages,
-                max_tokens=8092,
-                temperature=0.2,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                presence_penalty=presence_penalty,
                 stream=True,
-                extra_body={"chat_template_kwargs": {"enable_thinking": True}},
+                extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
             )
             for chunk in stream:
                 delta = chunk.choices[0].delta.content or ""
@@ -168,9 +188,10 @@ with tab_editor:
                                 f"Instruction: {gen_prompt}"
                             )},
                         ],
-                        max_tokens=16384,
-                        temperature=0.6,
-                        extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+                        max_tokens=max_tokens,
+                        temperature=temperature,
+                        top_p=top_p,
+                        extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
                     )
                     result = response.choices[0].message.content
                     code = extract_code(result, lang)