Add LLM parameter controls to sidebar
Thinking mode toggle, temperature, max tokens, top_p, and presence penalty sliders in the Streamlit sidebar. Parameters apply to both chat and file editor generation. Made-with: Cursor
This commit is contained in:
parent
9e1e0c0751
commit
12f9e3ac9b
33
app.py
33
app.py
@ -18,6 +18,7 @@ from pathlib import Path
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Configuration
|
# Configuration
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
st.sidebar.header("Connection")
|
||||||
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
|
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
|
||||||
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
|
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
|
||||||
MODEL = "qwen3.5-35b-a3b"
|
MODEL = "qwen3.5-35b-a3b"
|
||||||
@ -26,6 +27,23 @@ WORKSPACE.mkdir(exist_ok=True)
|
|||||||
|
|
||||||
client = OpenAI(base_url=API_BASE, api_key=API_KEY)
|
client = OpenAI(base_url=API_BASE, api_key=API_KEY)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Sidebar — LLM Parameters
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
st.sidebar.markdown("---")
|
||||||
|
st.sidebar.header("LLM Parameters")
|
||||||
|
|
||||||
|
thinking_mode = st.sidebar.toggle("Thinking Mode", value=False,
|
||||||
|
help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.")
|
||||||
|
temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05,
|
||||||
|
help="Lower = deterministic, higher = creative.")
|
||||||
|
max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256,
|
||||||
|
help="Maximum length of the response.")
|
||||||
|
top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05,
|
||||||
|
help="Nucleus sampling: only consider tokens within this cumulative probability.")
|
||||||
|
presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1,
|
||||||
|
help="Penalize repeated topics. Higher values encourage the model to talk about new topics.")
|
||||||
|
|
||||||
LANG_MAP = {
|
LANG_MAP = {
|
||||||
".py": "python", ".tex": "latex", ".js": "javascript",
|
".py": "python", ".tex": "latex", ".js": "javascript",
|
||||||
".html": "html", ".css": "css", ".sh": "bash",
|
".html": "html", ".css": "css", ".sh": "bash",
|
||||||
@ -89,10 +107,12 @@ with tab_chat:
|
|||||||
stream = client.chat.completions.create(
|
stream = client.chat.completions.create(
|
||||||
model=MODEL,
|
model=MODEL,
|
||||||
messages=st.session_state.messages,
|
messages=st.session_state.messages,
|
||||||
max_tokens=8092,
|
max_tokens=max_tokens,
|
||||||
temperature=0.2,
|
temperature=temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
presence_penalty=presence_penalty,
|
||||||
stream=True,
|
stream=True,
|
||||||
extra_body={"chat_template_kwargs": {"enable_thinking": True}},
|
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
|
||||||
)
|
)
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
delta = chunk.choices[0].delta.content or ""
|
delta = chunk.choices[0].delta.content or ""
|
||||||
@ -168,9 +188,10 @@ with tab_editor:
|
|||||||
f"Instruction: {gen_prompt}"
|
f"Instruction: {gen_prompt}"
|
||||||
)},
|
)},
|
||||||
],
|
],
|
||||||
max_tokens=16384,
|
max_tokens=max_tokens,
|
||||||
temperature=0.6,
|
temperature=temperature,
|
||||||
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
|
top_p=top_p,
|
||||||
|
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
|
||||||
)
|
)
|
||||||
result = response.choices[0].message.content
|
result = response.choices[0].message.content
|
||||||
code = extract_code(result, lang)
|
code = extract_code(result, lang)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user