LLM_Inferenz_Server_1/app.py

"""
Streamlit Chat & File Editor for Qwen3.5-35B-A3B

A minimal interface to:
  1. Chat with the local LLM (OpenAI-compatible API)
  2. Edit, save, and generate code / LaTeX files

Usage:
  pip install streamlit openai
  streamlit run app.py
"""

import re
import streamlit as st
from openai import OpenAI
from pathlib import Path

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
st.sidebar.header("Connection")
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
MODEL = "qwen3.5-35b-a3b"
WORKSPACE = Path("workspace")
WORKSPACE.mkdir(exist_ok=True)

client = OpenAI(base_url=API_BASE, api_key=API_KEY)

# ---------------------------------------------------------------------------
# Sidebar — LLM Parameters
# ---------------------------------------------------------------------------
st.sidebar.markdown("---")
st.sidebar.header("LLM Parameters")

thinking_mode = st.sidebar.toggle("Thinking Mode", value=False,
    help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.")
temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05,
    help="Lower = deterministic, higher = creative.")
max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256,
    help="Maximum length of the response.")
top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05,
    help="Nucleus sampling: only consider tokens within this cumulative probability.")
presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1,
    help="Penalize repeated topics. Higher values encourage the model to talk about new topics.")

LANG_MAP = {
    ".py": "python", ".tex": "latex", ".js": "javascript",
    ".html": "html", ".css": "css", ".sh": "bash",
    ".json": "json", ".yaml": "yaml", ".yml": "yaml",
}


def extract_code(text: str, lang: str = "") -> str:
    """Extract the first fenced code block from markdown text.
    Falls back to the full text if no code block is found."""
    pattern = r"```(?:\w*)\n(.*?)```"
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return text.strip()


# ---------------------------------------------------------------------------
# Sidebar — File Manager
# ---------------------------------------------------------------------------
st.sidebar.markdown("---")
st.sidebar.header("File Manager")

new_filename = st.sidebar.text_input("New file name", placeholder="main.tex")
if st.sidebar.button("Create File") and new_filename:
    (WORKSPACE / new_filename).touch()
    st.sidebar.success(f"Created {new_filename}")
    st.rerun()

files = sorted(WORKSPACE.iterdir()) if WORKSPACE.exists() else []
file_names = [f.name for f in files if f.is_file()]
selected_file = st.sidebar.selectbox("Open file", file_names if file_names else ["(no files)"])

# ---------------------------------------------------------------------------
# Main Layout — Two Tabs
# ---------------------------------------------------------------------------
tab_chat, tab_editor = st.tabs(["Chat", "File Editor"])

# ---------------------------------------------------------------------------
# Tab 1: Chat
# ---------------------------------------------------------------------------
with tab_chat:
    st.header("Chat with Qwen3.5")

    if "messages" not in st.session_state:
        st.session_state.messages = []

    for msg in st.session_state.messages:
        with st.chat_message(msg["role"]):
            st.markdown(msg["content"])

    if prompt := st.chat_input("Ask anything..."):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            placeholder = st.empty()
            full_response = ""

            stream = client.chat.completions.create(
                model=MODEL,
                messages=st.session_state.messages,
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                presence_penalty=presence_penalty,
                stream=True,
                extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
            )
            for chunk in stream:
                delta = chunk.choices[0].delta.content or ""
                full_response += delta
                placeholder.markdown(full_response + "▌")
            placeholder.markdown(full_response)

        st.session_state.messages.append({"role": "assistant", "content": full_response})

    if st.session_state.messages:
        col_clear, col_save = st.columns([1, 3])
        with col_clear:
            if st.button("Clear Chat"):
                st.session_state.messages = []
                st.rerun()
        with col_save:
            if selected_file and selected_file != "(no files)":
                if st.button(f"Save code → {selected_file}"):
                    last = st.session_state.messages[-1]["content"]
                    suffix = Path(selected_file).suffix
                    lang = LANG_MAP.get(suffix, "")
                    code = extract_code(last, lang)
                    (WORKSPACE / selected_file).write_text(code)
                    st.success(f"Extracted code saved to workspace/{selected_file}")

# ---------------------------------------------------------------------------
# Tab 2: File Editor
# ---------------------------------------------------------------------------
with tab_editor:
    st.header("File Editor")

    if selected_file and selected_file != "(no files)":
        file_path = WORKSPACE / selected_file
        content = file_path.read_text() if file_path.exists() else ""
        suffix = file_path.suffix
        lang = LANG_MAP.get(suffix, "text")

        st.code(content, language=lang if lang != "text" else None, line_numbers=True)

        edited = st.text_area(
            "Edit below:",
            value=content,
            height=400,
            key=f"editor_{selected_file}_{hash(content)}",
        )

        col_save, col_gen = st.columns(2)

        with col_save:
            if st.button("Save File"):
                file_path.write_text(edited)
                st.success(f"Saved {selected_file}")
                st.rerun()

        with col_gen:
            gen_prompt = st.text_input(
                "Generation instruction",
                placeholder="e.g. Add error handling / Fix the LaTeX formatting",
                key="gen_prompt",
            )
            if st.button("Generate with LLM") and gen_prompt:
                with st.spinner("Generating..."):
                    response = client.chat.completions.create(
                        model=MODEL,
                        messages=[
                            {"role": "system", "content": (
                                f"You are a coding assistant. The user has a {lang} file. "
                                "Return ONLY the raw file content inside a single code block. "
                                "No explanations, no comments about changes."
                            )},
                            {"role": "user", "content": (
                                f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
                                f"Instruction: {gen_prompt}"
                            )},
                        ],
                        max_tokens=max_tokens,
                        temperature=temperature,
                        top_p=top_p,
                        extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
                    )
                    result = response.choices[0].message.content
                    code = extract_code(result, lang)
                    file_path.write_text(code)
                    st.success("File updated by LLM")
                    st.rerun()
    else:
        st.info("Create a file in the sidebar to start editing.")