diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..aab4afb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,172 @@
+# ---> Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+**/.env
+**/.venv
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..ab1f416
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,10 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Ignored default folder with query files
+/queries/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/aise-501_aise_in_se_i.iml b/.idea/aise-501_aise_in_se_i.iml
new file mode 100644
index 0000000..4b71d81
--- /dev/null
+++ b/.idea/aise-501_aise_in_se_i.iml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.12 (aise-501_aise_in_se_i)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
new file mode 100644
index 0000000..28d6d4c
--- /dev/null
+++ b/.idea/dataSources.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
+    <data-source source="LOCAL" name="postgres@localhost" uuid="be9eece5-a8ff-447a-a6a9-4660fffe89da">
+      <driver-ref>postgresql</driver-ref>
+      <synchronize>true</synchronize>
+      <jdbc-driver>org.postgresql.Driver</jdbc-driver>
+      <jdbc-url>jdbc:postgresql://localhost:5432/postgres</jdbc-url>
+      <working-dir>$ProjectFileDir$</working-dir>
+    </data-source>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/data_source_mapping.xml b/.idea/data_source_mapping.xml
new file mode 100644
index 0000000..744fe16
--- /dev/null
+++ b/.idea/data_source_mapping.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="DataSourcePerFileMappings">
+    <file url="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.sql" value="be9eece5-a8ff-447a-a6a9-4660fffe89da" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..dd4c951
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,7 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="PROJECT_PROFILE" value="Default" />
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..27c5270
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (aise-501_aise_in_se_i)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..5977a4a
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/aise-501_aise_in_se_i.iml" filepath="$PROJECT_DIR$/.idea/aise-501_aise_in_se_i.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/AISE501 LLM Zugang/STUDENT_GUIDE.md b/AISE501 LLM Zugang/STUDENT_GUIDE.md
new file mode 100644
index 0000000..33f2696
--- /dev/null
+++ b/AISE501 LLM Zugang/STUDENT_GUIDE.md	
@@ -0,0 +1,272 @@
+# Student Guide — Qwen3.5 Inference Server
+
+## Overview
+
+A **Qwen3.5** large language model is running on our GPU server. Two models
+may be available at different times (your instructor will let you know which
+one is active):
+
+| Model | Params | Best for |
+|-------|--------|----------|
+| `qwen3.5-35b-a3b` | 35B (3B active) | Fast responses, everyday tasks |
+| `qwen3.5-122b-a10b-fp8` | 122B (10B active) | Complex reasoning, coding, research |
+
+There are **three ways** to interact with the model:
+
+1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
+2. **Streamlit App** — Local app with chat, file editor, and code execution
+3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
+
+> **Note**: You must be on the fhgr network or VPN to reach the server.
+
+## Connection Details
+
+| Parameter        | Value                                       |
+|------------------|---------------------------------------------|
+| **Open WebUI**   | `http://silicon.fhgr.ch:7081`               |
+| **API Base URL** | `http://silicon.fhgr.ch:7080/v1`            |
+| **Model**        | *(check Open WebUI model selector or ask your instructor)* |
+| **API Key**      | *(ask your instructor — may be `EMPTY`)*    |
+
+> **Tip**: In Open WebUI, the model dropdown at the top automatically shows
+> whichever model is currently running. For the API, use
+> `curl http://silicon.fhgr.ch:7080/v1/models` to check.
+
+---
+
+## Option 1: Open WebUI (Recommended)
+
+The easiest way to chat with the model — no installation required.
+
+### Getting Started
+
+1. Make sure you are connected to the **university network** (or VPN).
+2. Open your browser and go to **http://silicon.fhgr.ch:7081**
+3. Click **"Sign Up"** to create a new account:
+   - Enter your **name** (e.g. your first and last name)
+   - Enter your **email** (use your university email)
+   - Choose a **password**
+   - Click **"Create Account"**
+4. After signing up you are logged in automatically.
+5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
+6. Type a message and press Enter — you're chatting with the LLM.
+
+### Returning Later
+
+- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
+- Enter the email and password you used during sign-up.
+- All your previous chats are still there.
+
+### Features
+
+- **Chat history** — all conversations are saved on the server and persist across sessions
+- **Markdown rendering** with syntax-highlighted code blocks
+- **Model selector** — auto-discovers available models from the server
+- **Conversation branching** — edit previous messages and explore alternative responses
+- **File upload** — attach files to your messages for the model to analyze
+- **Search** — search across all your past conversations
+
+### Tips
+
+- Your account and chat history are stored on the server. You can log in
+  from any device on the university network.
+- If you forget your password, ask your instructor to reset it via the
+  Admin Panel.
+- The model works best when you provide clear, specific instructions.
+- For code tasks, mention the programming language explicitly (e.g.
+  "Write a Python function that...").
+- Long conversations use more context. Start a **New Chat** (top-left
+  button) when switching topics to get faster, more focused responses.
+
+---
+
+## Option 2: Streamlit App (Chat + File Editor)
+
+A local app with chat, file editing, and Python/LaTeX execution.
+See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
+
+---
+
+## Option 3: Python SDK / curl
+
+For programmatic access and scripting.
+
+### Quick Start with Python
+
+#### 1. Install the OpenAI SDK
+
+```bash
+pip install openai
+```
+
+#### 2. Simple Chat
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://silicon.fhgr.ch:7080/v1",
+    api_key="EMPTY",  # replace if your instructor set a key
+)
+
+response = client.chat.completions.create(
+    model="qwen3.5-35b-a3b",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Explain gradient descent in simple terms."},
+    ],
+    max_tokens=1024,
+    temperature=0.7,
+)
+
+print(response.choices[0].message.content)
+```
+
+#### 3. Streaming Responses
+
+```python
+stream = client.chat.completions.create(
+    model="qwen3.5-35b-a3b",
+    messages=[
+        {"role": "user", "content": "Write a haiku about machine learning."},
+    ],
+    max_tokens=256,
+    stream=True,
+)
+
+for chunk in stream:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="", flush=True)
+print()
+```
+
+---
+
+### Quick Start with curl
+
+```bash
+curl http://silicon.fhgr.ch:7080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "qwen3.5-35b-a3b",
+    "messages": [
+      {"role": "user", "content": "What is the capital of Switzerland?"}
+    ],
+    "max_tokens": 256,
+    "temperature": 0.7
+  }'
+```
+
+---
+
+## Recommended Parameters
+
+| Parameter       | Recommended | Notes                                        |
+|-----------------|-------------|----------------------------------------------|
+| `temperature`   | 0.7         | Lower = more deterministic, higher = creative |
+| `max_tokens`    | 1024–4096   | Increase for long-form output                |
+| `top_p`         | 0.95        | Nucleus sampling                             |
+| `stream`        | `true`      | Better UX for interactive use                |
+
+---
+
+## Tips & Etiquette
+
+- **Be mindful of context length**: Avoid excessively long prompts (>8K tokens) unless necessary.
+- **Use streaming**: Makes responses feel faster and reduces perceived latency.
+- **Don't spam requests**: The server is shared among ~15 students.
+- **Check the model name**: Always use `qwen3.5-35b-a3b` as the model parameter.
+
+---
+
+## Streamlit Chat & File Editor App
+
+A web UI is included for chatting with the model and editing files. It runs
+on your own machine and connects to the GPU server.
+
+### Setup
+
+```bash
+# Clone the repository
+git clone https://gitea.fhgr.ch/herzogfloria/LLM_Inferenz_Server_1.git
+cd LLM_Inferenz_Server_1
+
+# Create a virtual environment and install dependencies
+python3 -m venv .venv
+source .venv/bin/activate        # macOS / Linux
+# .venv\Scripts\activate         # Windows
+pip install -r requirements.txt
+```
+
+### Run
+
+```bash
+streamlit run app.py
+```
+
+Opens at `http://localhost:8501` in your browser.
+
+### Features
+
+**Chat Tab**
+- Conversational interface with streaming responses
+- "Save code" button extracts code from the LLM response and saves it to a
+  workspace file (strips markdown formatting automatically)
+
+**File Editor Tab**
+- Create and edit `.py`, `.tex`, `.html`, or any text file
+- Syntax-highlighted preview of file content
+- "Generate with LLM" button: describe a change in natural language and the
+  model rewrites the file (e.g. "add error handling", "fix the LaTeX formatting",
+  "translate comments to German")
+
+**Sidebar Controls**
+- **Connection**: API Base URL and API Key
+- **LLM Parameters**: Adjustable for each request
+
+| Parameter | Default | What it does |
+|-----------|---------|--------------|
+| Thinking Mode | Off | Toggle chain-of-thought reasoning (better for complex tasks, slower) |
+| Temperature | 0.7 | Lower = predictable, higher = creative |
+| Max Tokens | 4096 | Maximum response length |
+| Top P | 0.95 | Nucleus sampling threshold |
+| Presence Penalty | 0.0 | Encourage diverse topics |
+
+- **File Manager**: Create new files and switch between them
+
+All generated files are stored in a `workspace/` folder next to `app.py`.
+
+> **Tip**: The app runs entirely on your local machine. Only the LLM requests
+> go to the server — your files stay local.
+
+---
+
+## Thinking Mode
+
+By default, the model "thinks" before answering (internal chain-of-thought).
+This is great for complex reasoning but adds latency for simple questions.
+
+To disable thinking and get faster direct responses, add this to your API call:
+
+```python
+response = client.chat.completions.create(
+    model="qwen3.5-35b-a3b",
+    messages=[...],
+    max_tokens=1024,
+    extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+)
+```
+
+---
+
+## Troubleshooting
+
+| Issue                       | Solution                                            |
+|-----------------------------|-----------------------------------------------------|
+| Connection refused          | Check you're on the university network / VPN        |
+| Model not found             | Use model name `qwen3.5-35b-a3b` exactly            |
+| Slow responses              | The model is shared — peak times may be slower      |
+| `401 Unauthorized`          | Ask your instructor for the API key                 |
+| Response cut off            | Increase `max_tokens` in your request               |
+| Open WebUI login fails      | Make sure you created an account first (Sign Up)    |
+| Open WebUI shows no models  | The vLLM server may still be loading — wait a few minutes |
diff --git a/AISE501 LLM Zugang/app.py b/AISE501 LLM Zugang/app.py
new file mode 100644
index 0000000..7a6dc39
--- /dev/null
+++ b/AISE501 LLM Zugang/app.py	
@@ -0,0 +1,346 @@
+"""
+Streamlit Chat & File Editor for Qwen3.5
+
+A minimal interface to:
+  1. Chat with the local LLM (OpenAI-compatible API)
+  2. Edit, save, and generate code / LaTeX files
+
+Usage:
+  pip install streamlit openai
+  streamlit run app.py
+"""
+
+import re
+import subprocess
+import streamlit as st
+from openai import OpenAI
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+st.sidebar.header("Connection")
+API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
+API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
+WORKSPACE = Path("workspace")
+WORKSPACE.mkdir(exist_ok=True)
+
+client = OpenAI(base_url=API_BASE, api_key=API_KEY)
+
+
+@st.cache_data(ttl=30)
+def fetch_models(base_url: str, api_key: str) -> list[str]:
+    """Fetch available model IDs from the vLLM server."""
+    try:
+        c = OpenAI(base_url=base_url, api_key=api_key)
+        return [m.id for m in c.models.list().data]
+    except Exception:
+        return []
+
+
+available_models = fetch_models(API_BASE, API_KEY)
+if available_models:
+    MODEL = st.sidebar.selectbox("Model", available_models)
+else:
+    MODEL = st.sidebar.text_input("Model (server unreachable)", "qwen3.5-35b-a3b")
+    st.sidebar.warning("Could not fetch models from server.")
+
+# ---------------------------------------------------------------------------
+# Sidebar — LLM Parameters
+# ---------------------------------------------------------------------------
+st.sidebar.markdown("---")
+st.sidebar.header("LLM Parameters")
+
+thinking_mode = st.sidebar.toggle("Thinking Mode", value=False,
+    help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.")
+temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05,
+    help="Lower = deterministic, higher = creative.")
+max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256,
+    help="Maximum length of the response.")
+top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05,
+    help="Nucleus sampling: only consider tokens within this cumulative probability.")
+presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1,
+    help="Penalize repeated topics. Higher values encourage the model to talk about new topics.")
+
+LANG_MAP = {
+    ".py": "python", ".tex": "latex", ".js": "javascript",
+    ".html": "html", ".css": "css", ".sh": "bash",
+    ".json": "json", ".yaml": "yaml", ".yml": "yaml",
+}
+
+
+MAX_CONTEXT = 32768
+
+
+def extract_code(text: str, lang: str = "") -> str:
+    """Extract the best code block from markdown text.
+
+    Strategy:
+      1. Prefer blocks tagged with the target language (e.g. ```python)
+      2. Among candidates, pick the longest block (skip trivial one-liners)
+      3. Fall back to the longest block of any language
+      4. Fall back to the full text if no fenced block is found
+    """
+    tagged_pattern = r"```(\w*)\n(.*?)```"
+    matches = re.findall(tagged_pattern, text, re.DOTALL)
+    if not matches:
+        return text.strip()
+
+    lang_lower = lang.lower()
+    lang_matches = [code for tag, code in matches if tag.lower() == lang_lower]
+    if lang_matches:
+        return max(lang_matches, key=len).strip()
+
+    all_blocks = [code for _, code in matches]
+    return max(all_blocks, key=len).strip()
+
+
+def estimate_tokens(messages: list[dict]) -> int:
+    """Rough token estimate: ~4 characters per token."""
+    return sum(len(m["content"]) for m in messages) // 4
+
+
+def trim_history(messages: list[dict], reserved: int) -> list[dict]:
+    """Drop oldest message pairs to fit within context budget.
+    Always keeps the latest user message."""
+    budget = MAX_CONTEXT - reserved
+    while len(messages) > 1 and estimate_tokens(messages) > budget:
+        messages.pop(0)
+    return messages
+
+
+RUNNABLE_EXTENSIONS = {".py", ".tex"}
+RUN_TIMEOUT = 30
+
+
+def run_file(file_path: Path) -> dict:
+    """Execute a .py or .tex file and return stdout, stderr, and return code."""
+    suffix = file_path.suffix
+    cwd = file_path.parent.resolve()
+
+    if suffix == ".py":
+        cmd = ["python3", file_path.name]
+    elif suffix == ".tex":
+        cmd = [
+            "pdflatex",
+            "-interaction=nonstopmode",
+            f"-output-directory={cwd}",
+            file_path.name,
+        ]
+    else:
+        return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
+
+    try:
+        proc = subprocess.run(
+            cmd,
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+            timeout=RUN_TIMEOUT,
+        )
+        return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
+    except subprocess.TimeoutExpired:
+        return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
+    except FileNotFoundError as e:
+        return {"stdout": "", "stderr": str(e), "rc": -1}
+
+
+# ---------------------------------------------------------------------------
+# Sidebar — File Manager
+# ---------------------------------------------------------------------------
+st.sidebar.markdown("---")
+st.sidebar.header("File Manager")
+
+new_filename = st.sidebar.text_input("New file name", placeholder="main.tex")
+if st.sidebar.button("Create File") and new_filename:
+    (WORKSPACE / new_filename).touch()
+    st.sidebar.success(f"Created {new_filename}")
+    st.rerun()
+
+files = sorted(WORKSPACE.iterdir()) if WORKSPACE.exists() else []
+file_names = [f.name for f in files if f.is_file()]
+selected_file = st.sidebar.selectbox("Open file", file_names if file_names else ["(no files)"])
+
+# ---------------------------------------------------------------------------
+# Main Layout — Two Tabs
+# ---------------------------------------------------------------------------
+tab_chat, tab_editor = st.tabs(["Chat", "File Editor"])
+
+# ---------------------------------------------------------------------------
+# Tab 1: Chat
+# ---------------------------------------------------------------------------
+with tab_chat:
+    st.header(f"Chat with {MODEL}")
+
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+
+    for msg in st.session_state.messages:
+        with st.chat_message(msg["role"]):
+            st.markdown(msg["content"])
+
+    if prompt := st.chat_input("Ask anything..."):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+
+        st.session_state.messages = trim_history(
+            st.session_state.messages, reserved=max_tokens
+        )
+
+        with st.chat_message("assistant"):
+            placeholder = st.empty()
+            full_response = ""
+
+            stream = client.chat.completions.create(
+                model=MODEL,
+                messages=st.session_state.messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                presence_penalty=presence_penalty,
+                stream=True,
+                extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
+            )
+            for chunk in stream:
+                delta = chunk.choices[0].delta.content or ""
+                full_response += delta
+                placeholder.markdown(full_response + "▌")
+            placeholder.markdown(full_response)
+
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+
+    if st.session_state.messages:
+        used = estimate_tokens(st.session_state.messages)
+        pct = min(used / MAX_CONTEXT, 1.0)
+        label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
+        if pct > 0.8:
+            label += " ⚠️ nearing limit — older messages will be trimmed"
+        st.progress(pct, text=label)
+
+        col_clear, col_save = st.columns([1, 3])
+        with col_clear:
+            if st.button("Clear Chat"):
+                st.session_state.messages = []
+                st.rerun()
+        with col_save:
+            if selected_file and selected_file != "(no files)":
+                if st.button(f"Save code → {selected_file}"):
+                    last = st.session_state.messages[-1]["content"]
+                    suffix = Path(selected_file).suffix
+                    lang = LANG_MAP.get(suffix, "")
+                    code = extract_code(last, lang)
+                    (WORKSPACE / selected_file).write_text(code)
+                    st.success(f"Extracted code saved to workspace/{selected_file}")
+
+# ---------------------------------------------------------------------------
+# Tab 2: File Editor
+# ---------------------------------------------------------------------------
+with tab_editor:
+    st.header("File Editor")
+
+    if selected_file and selected_file != "(no files)":
+        file_path = WORKSPACE / selected_file
+        content = file_path.read_text() if file_path.exists() else ""
+        suffix = file_path.suffix
+        lang = LANG_MAP.get(suffix, "text")
+        runnable = suffix in RUNNABLE_EXTENSIONS
+
+        if runnable:
+            col_edit, col_term = st.columns([3, 2])
+        else:
+            col_edit = st.container()
+
+        with col_edit:
+            st.code(content, language=lang if lang != "text" else None, line_numbers=True)
+
+            edited = st.text_area(
+                "Edit below:",
+                value=content,
+                height=400,
+                key=f"editor_{selected_file}_{hash(content)}",
+            )
+
+            col_save, col_gen = st.columns(2)
+
+            with col_save:
+                if st.button("Save File"):
+                    file_path.write_text(edited)
+                    st.success(f"Saved {selected_file}")
+                    st.rerun()
+
+            with col_gen:
+                gen_prompt = st.text_input(
+                    "Generation instruction",
+                    placeholder="e.g. Add error handling / Fix the LaTeX formatting",
+                    key="gen_prompt",
+                )
+                if st.button("Generate with LLM") and gen_prompt:
+                    with st.spinner("Generating..."):
+                        response = client.chat.completions.create(
+                            model=MODEL,
+                            messages=[
+                                {"role": "system", "content": (
+                                    f"You are a coding assistant. The user has a {lang} file. "
+                                    "Return ONLY the raw file content inside a single code block. "
+                                    "No explanations, no comments about changes."
+                                )},
+                                {"role": "user", "content": (
+                                    f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
+                                    f"Instruction: {gen_prompt}"
+                                )},
+                            ],
+                            max_tokens=max_tokens,
+                            temperature=temperature,
+                            top_p=top_p,
+                            extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
+                        )
+                        result = response.choices[0].message.content
+                        code = extract_code(result, lang)
+                        file_path.write_text(code)
+                        st.success("File updated by LLM")
+                        st.rerun()
+
+        if runnable:
+            with col_term:
+                run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
+                st.subheader("Terminal Output")
+
+                if st.button(run_label, type="primary"):
+                    file_path.write_text(edited)
+                    with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
+                        result = run_file(file_path)
+                    st.session_state["last_run"] = result
+
+                result = st.session_state.get("last_run")
+                if result:
+                    if result["rc"] == 0:
+                        st.success(f"Exit code: {result['rc']}")
+                    else:
+                        st.error(f"Exit code: {result['rc']}")
+
+                    if result["stdout"]:
+                        st.text_area(
+                            "stdout",
+                            value=result["stdout"],
+                            height=300,
+                            disabled=True,
+                            key="run_stdout",
+                        )
+                    if result["stderr"]:
+                        st.text_area(
+                            "stderr",
+                            value=result["stderr"],
+                            height=200,
+                            disabled=True,
+                            key="run_stderr",
+                        )
+                    if not result["stdout"] and not result["stderr"]:
+                        st.info("No output produced.")
+                else:
+                    st.caption(
+                        f"Click **{run_label}** to execute the file "
+                        f"(timeout: {RUN_TIMEOUT}s)."
+                    )
+    else:
+        st.info("Create a file in the sidebar to start editing.")
diff --git a/AISE501 LLM Zugang/requirements.txt b/AISE501 LLM Zugang/requirements.txt
new file mode 100644
index 0000000..d218a70
--- /dev/null
+++ b/AISE501 LLM Zugang/requirements.txt	
@@ -0,0 +1,2 @@
+streamlit
+openai
diff --git a/AISE501 LLM Zugang/test_server.py b/AISE501 LLM Zugang/test_server.py
new file mode 100644
index 0000000..ff88635
--- /dev/null
+++ b/AISE501 LLM Zugang/test_server.py	
@@ -0,0 +1,70 @@
+"""
+Quick test script to verify the vLLM server is running and responding.
+
+Usage:
+    pip install openai
+    python test_server.py [--host HOST] [--port PORT] [--api-key KEY]
+"""
+
+import argparse
+import sys
+
+from openai import OpenAI
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test vLLM inference server")
+    parser.add_argument("--host", default="localhost", help="Server hostname")
+    parser.add_argument("--port", default=7080, type=int, help="Server port")
+    parser.add_argument("--api-key", default="EMPTY", help="API key")
+    args = parser.parse_args()
+
+    base_url = f"http://{args.host}:{args.port}/v1"
+    model = "qwen3.5-35b-a3b"
+    client = OpenAI(base_url=base_url, api_key=args.api_key)
+
+    print(f"Connecting to {base_url} ...")
+
+    print("\n--- Available Models ---")
+    try:
+        models = client.models.list()
+        for m in models.data:
+            print(f"  {m.id}")
+    except Exception as e:
+        print(f"ERROR: Cannot connect to server: {e}")
+        sys.exit(1)
+
+    print("\n--- Test Chat Completion ---")
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "user", "content": "Create a latex document that derives and explains the principle component analysis (pca). Make a self contain document with introduction, derivation, examples of applications. This is for computer science undergraduate class."}
+        ],
+        max_tokens=16384,
+        temperature=0.7,
+    )
+    print(f"  Response: {response.choices[0].message.content}")
+    print(f"  Tokens:   prompt={response.usage.prompt_tokens}, "
+          f"completion={response.usage.completion_tokens}")
+
+    print("\n--- Test Streaming ---")
+    stream = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "user", "content": "Count from 1 to 5."}
+        ],
+        max_tokens=16384,
+        temperature=0.7,
+        stream=True,
+    )
+    print("  Response: ", end="")
+    for chunk in stream:
+        if chunk.choices[0].delta.content:
+            print(chunk.choices[0].delta.content, end="", flush=True)
+    print("\n")
+
+    print("All tests passed!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Clean Code exercise/example1_calculator/calculator_analysis.aux b/Clean Code exercise/example1_calculator/calculator_analysis.aux
new file mode 100644
index 0000000..ec105ec
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_analysis.aux	
@@ -0,0 +1,18 @@
+\relax 
+\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
+\@nameuse{bbl@beforestart}
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\babel@aux{english}{}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{2}{section.1}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {2}Violation 1: Unused and Poorly Formatted Imports}{2}{section.2}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {3}Violation 2: No Module Docstring or Documentation}{2}{section.3}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {4}Violation 3: Poor Naming Conventions}{3}{section.4}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {5}Violation 4: Formatting and Whitespace}{4}{section.5}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {6}Violation 5: Error Handling}{5}{section.6}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {7}Violation 6: Function Structure and Single Responsibility}{6}{section.7}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {8}Violation 7: Missing \texttt  {\_\_main\_\_} Guard}{7}{section.8}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {9}Violation 8: String Concatenation Instead of f-Strings}{7}{section.9}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {10}Summary of Violations}{8}{section.10}\protected@file@percent }
+\gdef \@abspage@last{8}
diff --git a/Clean Code exercise/example1_calculator/calculator_analysis.out b/Clean Code exercise/example1_calculator/calculator_analysis.out
new file mode 100644
index 0000000..11c7918
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_analysis.out	
@@ -0,0 +1,10 @@
+\BOOKMARK [1][-]{section.1}{\376\377\000O\000v\000e\000r\000v\000i\000e\000w}{}% 1
+\BOOKMARK [1][-]{section.2}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0001\000:\000\040\000U\000n\000u\000s\000e\000d\000\040\000a\000n\000d\000\040\000P\000o\000o\000r\000l\000y\000\040\000F\000o\000r\000m\000a\000t\000t\000e\000d\000\040\000I\000m\000p\000o\000r\000t\000s}{}% 2
+\BOOKMARK [1][-]{section.3}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0002\000:\000\040\000N\000o\000\040\000M\000o\000d\000u\000l\000e\000\040\000D\000o\000c\000s\000t\000r\000i\000n\000g\000\040\000o\000r\000\040\000D\000o\000c\000u\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 3
+\BOOKMARK [1][-]{section.4}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0003\000:\000\040\000P\000o\000o\000r\000\040\000N\000a\000m\000i\000n\000g\000\040\000C\000o\000n\000v\000e\000n\000t\000i\000o\000n\000s}{}% 4
+\BOOKMARK [1][-]{section.5}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0004\000:\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g\000\040\000a\000n\000d\000\040\000W\000h\000i\000t\000e\000s\000p\000a\000c\000e}{}% 5
+\BOOKMARK [1][-]{section.6}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0005\000:\000\040\000E\000r\000r\000o\000r\000\040\000H\000a\000n\000d\000l\000i\000n\000g}{}% 6
+\BOOKMARK [1][-]{section.7}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0006\000:\000\040\000F\000u\000n\000c\000t\000i\000o\000n\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000\040\000a\000n\000d\000\040\000S\000i\000n\000g\000l\000e\000\040\000R\000e\000s\000p\000o\000n\000s\000i\000b\000i\000l\000i\000t\000y}{}% 7
+\BOOKMARK [1][-]{section.8}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0007\000:\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000\137\000\137\000m\000a\000i\000n\000\137\000\137\000\040\000G\000u\000a\000r\000d}{}% 8
+\BOOKMARK [1][-]{section.9}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0008\000:\000\040\000S\000t\000r\000i\000n\000g\000\040\000C\000o\000n\000c\000a\000t\000e\000n\000a\000t\000i\000o\000n\000\040\000I\000n\000s\000t\000e\000a\000d\000\040\000o\000f\000\040\000f\000-\000S\000t\000r\000i\000n\000g\000s}{}% 9
+\BOOKMARK [1][-]{section.10}{\376\377\000S\000u\000m\000m\000a\000r\000y\000\040\000o\000f\000\040\000V\000i\000o\000l\000a\000t\000i\000o\000n\000s}{}% 10
diff --git a/Clean Code exercise/example1_calculator/calculator_analysis.pdf b/Clean Code exercise/example1_calculator/calculator_analysis.pdf
new file mode 100644
index 0000000..9b7d765
Binary files /dev/null and b/Clean Code exercise/example1_calculator/calculator_analysis.pdf differ
diff --git a/Clean Code exercise/example1_calculator/calculator_analysis.tex b/Clean Code exercise/example1_calculator/calculator_analysis.tex
new file mode 100644
index 0000000..954adeb
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_analysis.tex	
@@ -0,0 +1,415 @@
+\documentclass[12pt,a4paper]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage[english]{babel}
+\usepackage{geometry}
+\geometry{margin=2.5cm}
+\usepackage{xcolor}
+\usepackage{tcolorbox}
+\usepackage{booktabs}
+\usepackage{hyperref}
+\usepackage{listings}
+\usepackage{enumitem}
+
+\definecolor{seblue}{rgb}{0.0,0.28,0.67}
+\definecolor{segreen}{rgb}{0.13,0.55,0.13}
+\definecolor{sered}{rgb}{0.7,0.13,0.13}
+\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
+\definecolor{codegreen}{rgb}{0,0.6,0}
+\definecolor{codepurple}{rgb}{0.58,0,0.82}
+
+\lstdefinestyle{pystyle}{
+    backgroundcolor=\color{backcolour},
+    commentstyle=\color{codegreen},
+    keywordstyle=\color{blue},
+    stringstyle=\color{codepurple},
+    basicstyle=\ttfamily\footnotesize,
+    breaklines=true,
+    keepspaces=true,
+    showstringspaces=false,
+    tabsize=4,
+    language=Python
+}
+\lstset{style=pystyle}
+
+\newtcolorbox{badbox}{
+    colback=red!5!white,
+    colframe=sered,
+    title=Bad Code,
+    fonttitle=\bfseries\small,
+    boxrule=0.8pt, arc=2pt,
+    top=2pt, bottom=2pt, left=4pt, right=4pt
+}
+
+\newtcolorbox{goodbox}{
+    colback=green!5!white,
+    colframe=segreen,
+    title=Clean Code,
+    fonttitle=\bfseries\small,
+    boxrule=0.8pt, arc=2pt,
+    top=2pt, bottom=2pt, left=4pt, right=4pt
+}
+
+\newtcolorbox{principlebox}[1][]{
+    colback=blue!5!white,
+    colframe=seblue,
+    title=#1,
+    fonttitle=\bfseries\small,
+    boxrule=0.8pt, arc=2pt,
+    top=2pt, bottom=2pt, left=4pt, right=4pt
+}
+
+\title{\textcolor{seblue}{Code Analysis: Arithmetic Expression Calculator}\\[0.3em]
+\large What Makes Code Bad and How to Fix It\\[0.3em]
+\normalsize AISE501 -- AI in Software Engineering I}
+\author{Dr.\ Florian Herzog}
+\date{Spring Semester 2026}
+
+\begin{document}
+\maketitle
+\tableofcontents
+\newpage
+
+% ============================================
+\section{Overview}
+% ============================================
+
+This document analyses two implementations of the same program --- an arithmetic expression calculator that parses and evaluates strings like \texttt{"3 + 5 * 2"} without using Python's \texttt{eval()}.
+Both produce correct results, but the first version (\texttt{calculator\_bad.py}) violates numerous PEP\,8 and clean code principles, while the second (\texttt{calculator\_good.py}) follows them consistently.
+
+The analysis is structured by violation category, with side-by-side comparisons of the bad and good code and references to the specific PEP\,8 rules or clean code principles that apply.
+
+% ============================================
+\section{Violation 1: Unused and Poorly Formatted Imports}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+import sys,os,re;from typing import *
+\end{lstlisting}
+\end{badbox}
+
+\textbf{What is wrong:}
+\begin{itemize}
+    \item \texttt{sys}, \texttt{os}, and \texttt{re} are imported but \textbf{never used} anywhere in the code.
+    \item Multiple imports are crammed onto \textbf{one line separated by commas}, violating PEP\,8's rule that imports should be on separate lines.
+    \item A \textbf{semicolon} joins two import statements on one line.
+    \item \texttt{from typing import *} is a \textbf{wildcard import} that pollutes the namespace.
+\end{itemize}
+
+\begin{goodbox}
+The good version has \textbf{no imports at all} --- the calculator uses only built-in Python features.
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Imports}: ``Imports should usually be on separate lines.'' Wildcard imports (\texttt{from X import *}) should be avoided.
+    \item \textbf{KISS}: Unused imports add unnecessary complexity.
+    \item \textbf{Clean Code}: Dead code (unused imports) confuses readers about dependencies.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 2: No Module Docstring or Documentation}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+# calculator program
+def scicalc(s):
+\end{lstlisting}
+The only ``documentation'' is a single vague comment. No module docstring, no function docstrings.
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+"""Simple arithmetic expression calculator with a recursive-descent parser.
+
+Supported operations: +, -, *, / and parentheses.
+Does NOT use Python's eval().
+
+Grammar:
+    expression  = term (('+' | '-') term)*
+    term        = factor (('*' | '/') factor)*
+    factor      = NUMBER | '(' expression ')'
+"""
+\end{lstlisting}
+The good version opens with a module docstring that explains the purpose, supported operations, and even the formal grammar. Every function also has a docstring.
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,257}: All public modules, functions, classes, and methods should have docstrings.
+    \item \textbf{Clean Code -- Documentation}: Good documentation helps current and future developers understand the intent.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 3: Poor Naming Conventions}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+def scicalc(s):        # What does "scicalc" mean?
+def doPlusMinus(s,a,b):# camelCase, not snake_case
+def doMulDiv(s,a,b):   # "do" is vague
+def getNum(s, a,b):    # inconsistent spacing
+    t=s[a:b]           # "t" for what?
+    c=t[i]             # "c" for what?
+    L=doPlusMinus(...)  # uppercase "L" for a local variable
+    R=doMulDiv(...)     # uppercase "R" for a local variable
+    r=doPlusMinus(...)  # "r" for result?
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+def tokenize(expression_text):
+def parse_expression(tokens, position):
+def parse_term(tokens, position):
+def parse_factor(tokens, position):
+def calculate(expression_text):
+    character = expression_text[position]
+    operator = tokens[position]
+    right_value, position = parse_term(tokens, position)
+    result, final_position = parse_expression(tokens, 0)
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong in the bad version:}
+\begin{itemize}
+    \item Function names use \textbf{camelCase} (\texttt{doPlusMinus}) instead of \textbf{snake\_case}.
+    \item Variable names are \textbf{single letters} (\texttt{s}, \texttt{a}, \texttt{b}, \texttt{t}, \texttt{c}, \texttt{r}) --- impossible to understand without reading every line.
+    \item \texttt{L} and \texttt{R} use \textbf{uppercase} for local variables, which PEP\,8 reserves for constants.
+    \item Names like \texttt{scicalc} are \textbf{abbreviations} that are not pronounceable or self-explanatory.
+    \item The list of test data is called \texttt{Data} (capitalised like a class) and results \texttt{Res}.
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}.
+    \item \textbf{Clean Code -- Descriptive Names}: Names should reveal intent. A reader should know what a variable holds without tracing its assignment.
+    \item \textbf{Clean Code -- Pronounceable Names}: \texttt{scicalc} is not a word anyone would say in a conversation.
+    \item \textbf{Clean Code -- No Abbreviations}: \texttt{doPlusMinus} is better than \texttt{dPM}, but \texttt{parse\_expression} communicates the actual operation.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 4: Formatting and Whitespace}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+def scicalc(s):
+  s=s.replace(' ','')     # 2-space indent
+  if s=='':return 0       # no spaces around ==
+  r=doPlusMinus(s,0,len(s))
+  return r
+
+def doPlusMinus(s,a,b):
+    t=s[a:b]; level=0; i=len(t)-1  # 4-space indent, semicolons
+    while i>=0:                      # no space around >=
+        if level==0 and(c=='*' or c=='/'): # missing space before (
+            L = doMulDiv(s,a,a+i); R = getNum(s,a+i+1,b)
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+def parse_expression(tokens, position):
+    result, position = parse_term(tokens, position)
+
+    while position < len(tokens) and tokens[position] in ("+", "-"):
+        operator = tokens[position]
+        position += 1
+        right_value, position = parse_term(tokens, position)
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong:}
+\begin{itemize}
+    \item \textbf{Inconsistent indentation}: \texttt{scicalc} uses 2 spaces, other functions use 4 spaces. PEP\,8 requires 4 spaces consistently.
+    \item \textbf{Semicolons} to put multiple statements on one line (\texttt{t=s[a:b]; level=0; i=len(t)-1}).
+    \item \textbf{Missing whitespace} around operators: \texttt{s=s.replace}, \texttt{i>=0}, \texttt{level==0 and(c==...}.
+    \item \textbf{No blank lines} between logical sections within functions or between function definitions. PEP\,8 requires two blank lines before and after top-level functions.
+    \item Multiple \texttt{return} or assignment statements \textbf{on the same line} as \texttt{if}: \texttt{if s=='':return 0}.
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Indentation}: Use 4 spaces per indentation level.
+    \item \textbf{PEP\,8 -- Whitespace}: Surround binary operators with single spaces. Avoid compound statements on one line.
+    \item \textbf{PEP\,8 -- Blank Lines}: Two blank lines around top-level definitions.
+    \item \textbf{Zen of Python}: ``Sparse is better than dense.''
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 5: Error Handling}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+if R==0:print("ERROR division by zero!!!") ;return 0
+\end{lstlisting}
+\begin{lstlisting}
+try:
+    x = float(t)
+except:
+    print("bad number: "+t);x=0
+return x
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+if right_value == 0:
+    raise ZeroDivisionError("Division by zero")
+\end{lstlisting}
+\begin{lstlisting}
+try:
+    tokens = tokenize(expression_text)
+    result, final_position = parse_expression(tokens, 0)
+    ...
+except (ValueError, ZeroDivisionError) as error:
+    return f"Error: {error}"
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong in the bad version:}
+\begin{itemize}
+    \item \textbf{Bare \texttt{except}} catches every exception including \texttt{KeyboardInterrupt} and \texttt{SystemExit} --- masking real bugs.
+    \item Errors are handled by \textbf{printing and returning a dummy value} (0), which silently produces wrong results. The caller has no way to know an error occurred.
+    \item The error message style is inconsistent: \texttt{"ERROR division by zero!!!"} vs.\ \texttt{"bad number: ..."}.
+\end{itemize}
+
+\textbf{What the good version does:}
+\begin{itemize}
+    \item Errors \textbf{raise specific exceptions} (\texttt{ValueError}, \texttt{ZeroDivisionError}) at the point of detection.
+    \item The top-level \texttt{calculate()} function catches \textbf{only expected exceptions} and returns a formatted error string.
+    \item Errors \textbf{propagate} rather than being silently swallowed.
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Exceptions}: Catch specific exceptions, never use bare \texttt{except}.
+    \item \textbf{Zen of Python}: ``Errors should never pass silently. Unless explicitly silenced.''
+    \item \textbf{Clean Code -- Error Handling}: Anticipate errors and handle them gracefully.  Returning magic values (0 for an error) is an anti-pattern.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 6: Function Structure and Single Responsibility}
+% ============================================
+
+\begin{badbox}
+The bad version has three intertwined functions (\texttt{doPlusMinus}, \texttt{doMulDiv}, \texttt{getNum}) that each take the \textbf{entire string plus two index parameters} and internally slice the string. Parsing, tokenisation, and evaluation are all mixed together.
+\begin{lstlisting}
+def doPlusMinus(s,a,b):
+    t=s[a:b]; level=0; i=len(t)-1
+    while i>=0:
+        ...
+        L=doPlusMinus(s,a,a+i);R=doMulDiv(s,a+i+1,b)
+        ...
+    return doMulDiv(s,a,b)
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+The good version separates \textbf{tokenisation} from \textbf{parsing}:
+\begin{lstlisting}
+tokens = tokenize(expression_text)            # Step 1: tokenise
+result, position = parse_expression(tokens, 0) # Step 2: parse
+\end{lstlisting}
+Each parser function has a single, clear responsibility:
+\begin{itemize}[nosep]
+    \item \texttt{tokenize()} -- converts text to tokens
+    \item \texttt{parse\_expression()} -- handles \texttt{+} and \texttt{-}
+    \item \texttt{parse\_term()} -- handles \texttt{*} and \texttt{/}
+    \item \texttt{parse\_factor()} -- handles numbers and parentheses
+    \item \texttt{calculate()} -- orchestrates the pipeline and error handling
+\end{itemize}
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{SRP (Single Responsibility Principle)}: Each function should do one thing.
+    \item \textbf{SoC (Separation of Concerns)}: Tokenisation and parsing are different concerns.
+    \item \textbf{Clean Code -- Short Functions}: If a function takes more than a few minutes to comprehend, it should be refactored.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 7: Missing \texttt{\_\_main\_\_} Guard}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+main()
+\end{lstlisting}
+The bad version calls \texttt{main()} at the module level. If another script imports this file, the calculator runs immediately as a side effect.
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+if __name__ == "__main__":
+    main()
+\end{lstlisting}
+The good version uses the standard \texttt{\_\_main\_\_} guard, so the module can be safely imported without executing the calculator.
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{Clean Code -- Avoid Side Effects}: Importing a module should not trigger execution.
+    \item \textbf{Python Best Practice}: The \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard is standard for all runnable scripts.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 8: String Concatenation Instead of f-Strings}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+print(d+" = "+str(Res))
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+print(f"{display_expr} = {result}")
+\end{lstlisting}
+\end{goodbox}
+
+String concatenation with \texttt{+} and manual \texttt{str()} calls is harder to read than f-strings, which are the idiomatic Python 3.6+ way to format output.
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{Pythonic Code}: Use f-strings for string formatting (readable, efficient).
+    \item \textbf{Clean Code -- Readability}: f-strings make the output format immediately visible.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Summary of Violations}
+% ============================================
+
+\begin{center}
+\small
+\begin{tabular}{@{}rp{5cm}p{5.5cm}@{}}
+\toprule
+\textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\
+\midrule
+1 & Unused imports, wildcard import, one-line imports & PEP\,8 Imports, KISS \\
+2 & No docstrings or documentation & PEP\,257, Clean Code Documentation \\
+3 & camelCase names, single-letter variables, abbreviations & PEP\,8 Naming, Descriptive Names \\
+4 & Inconsistent indent, semicolons, missing whitespace & PEP\,8 Indentation \& Whitespace \\
+5 & Bare except, silent error swallowing & PEP\,8 Exceptions, Zen of Python \\
+6 & Mixed concerns, long tangled functions & SRP, SoC, Short Functions \\
+7 & No \texttt{\_\_main\_\_} guard & Avoid Side Effects \\
+8 & String concatenation instead of f-strings & Pythonic Code, Readability \\
+\bottomrule
+\end{tabular}
+\end{center}
+
+\end{document}
diff --git a/Clean Code exercise/example1_calculator/calculator_analysis.toc b/Clean Code exercise/example1_calculator/calculator_analysis.toc
new file mode 100644
index 0000000..9d748aa
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_analysis.toc	
@@ -0,0 +1,11 @@
+\babel@toc {english}{}\relax 
+\contentsline {section}{\numberline {1}Overview}{2}{section.1}%
+\contentsline {section}{\numberline {2}Violation 1: Unused and Poorly Formatted Imports}{2}{section.2}%
+\contentsline {section}{\numberline {3}Violation 2: No Module Docstring or Documentation}{2}{section.3}%
+\contentsline {section}{\numberline {4}Violation 3: Poor Naming Conventions}{3}{section.4}%
+\contentsline {section}{\numberline {5}Violation 4: Formatting and Whitespace}{4}{section.5}%
+\contentsline {section}{\numberline {6}Violation 5: Error Handling}{5}{section.6}%
+\contentsline {section}{\numberline {7}Violation 6: Function Structure and Single Responsibility}{6}{section.7}%
+\contentsline {section}{\numberline {8}Violation 7: Missing \texttt {\_\_main\_\_} Guard}{7}{section.8}%
+\contentsline {section}{\numberline {9}Violation 8: String Concatenation Instead of f-Strings}{7}{section.9}%
+\contentsline {section}{\numberline {10}Summary of Violations}{8}{section.10}%
diff --git a/Clean Code exercise/example1_calculator/calculator_bad.py b/Clean Code exercise/example1_calculator/calculator_bad.py
new file mode 100644
index 0000000..a076cc9
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_bad.py	
@@ -0,0 +1,64 @@
+import sys,os,re;from typing import *
+
+# calculator program
+def scicalc(s):
+  s=s.replace(' ','')
+  if s=='':return 0
+  r=doPlusMinus(s,0,len(s))
+  return r
+
+def doPlusMinus(s,a,b):
+    t=s[a:b]; level=0; i=len(t)-1
+    while i>=0:
+        c=t[i]
+        if c==')':level=level+1
+        if c=='(':level=level-1
+        if level==0 and (c=='+' or c=='-'):
+            L=doPlusMinus(s,a,a+i);R=doMulDiv(s,a+i+1,b)
+            if c=='+': return L+R
+            else: return L-R
+        i=i-1
+    return doMulDiv(s,a,b)
+
+def doMulDiv(s,a,b):
+    t=s[a:b];level=0;i=len(t)-1
+    while i >= 0:
+        c=t[i]
+        if c==')':level+=1
+        if c=='(':level-=1
+        if level==0 and(c=='*' or c=='/'):
+            L = doMulDiv(s,a,a+i); R = getNum(s,a+i+1,b)
+            if c=='*':return L*R
+            else:
+                if R==0:print("ERROR division by zero!!!") ;return 0
+                return L/R
+        i -= 1
+    return getNum(s,a,b)
+
+def getNum(s, a,b):
+    t = s[a:b]
+    if t[0]=='(' and t[-1]==')':
+        return doPlusMinus(s,a+1,b-1)
+    try:
+        x = float(t)
+    except:
+        print("bad number: "+t);x=0
+    return x
+
+def main():
+    Data = [
+        "3 + 5",
+        "10 - 2 * 3",
+        "( 4 + 6 ) * 2",
+        "100 / ( 5 * 2 )",
+        "3.5 + 2.5 * 4",
+        "( 1 + 2 ) * ( 3 + 4 )",
+        "",
+        "10 / 0",
+        "abc + 1",
+    ]
+    for d in Data:
+        Res=scicalc(d)
+        print(d+" = "+str(Res))
+
+main()
diff --git a/Clean Code exercise/example1_calculator/calculator_good.py b/Clean Code exercise/example1_calculator/calculator_good.py
new file mode 100644
index 0000000..c5fb638
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_good.py	
@@ -0,0 +1,153 @@
+"""Simple arithmetic expression calculator with a recursive-descent parser.
+
+Supported operations: +, -, *, / and parentheses.
+Does NOT use Python's eval().
+
+Grammar:
+    expression  = term (('+' | '-') term)*
+    term        = factor (('*' | '/') factor)*
+    factor      = NUMBER | '(' expression ')'
+"""
+
+
+def tokenize(expression_text):
+    """Convert an expression string into a list of tokens.
+
+    Tokens are either numbers (float) or single-character operators / parentheses.
+    Raises ValueError for characters that are not part of a valid expression.
+    """
+    tokens = []
+    position = 0
+
+    while position < len(expression_text):
+        character = expression_text[position]
+
+        if character.isspace():
+            position += 1
+            continue
+
+        if character in "+-*/()":
+            tokens.append(character)
+            position += 1
+            continue
+
+        if character.isdigit() or character == ".":
+            start = position
+            while position < len(expression_text) and (
+                expression_text[position].isdigit()
+                or expression_text[position] == "."
+            ):
+                position += 1
+            number_text = expression_text[start:position]
+            tokens.append(float(number_text))
+            continue
+
+        raise ValueError(
+            f"Unexpected character '{character}' at position {position}"
+        )
+
+    return tokens
+
+
+def parse_expression(tokens, position):
+    """Parse an expression: term (('+' | '-') term)*."""
+    result, position = parse_term(tokens, position)
+
+    while position < len(tokens) and tokens[position] in ("+", "-"):
+        operator = tokens[position]
+        position += 1
+        right_value, position = parse_term(tokens, position)
+
+        if operator == "+":
+            result += right_value
+        else:
+            result -= right_value
+
+    return result, position
+
+
+def parse_term(tokens, position):
+    """Parse a term: factor (('*' | '/') factor)*."""
+    result, position = parse_factor(tokens, position)
+
+    while position < len(tokens) and tokens[position] in ("*", "/"):
+        operator = tokens[position]
+        position += 1
+        right_value, position = parse_factor(tokens, position)
+
+        if operator == "*":
+            result *= right_value
+        else:
+            if right_value == 0:
+                raise ZeroDivisionError("Division by zero")
+            result /= right_value
+
+    return result, position
+
+
+def parse_factor(tokens, position):
+    """Parse a factor: NUMBER | '(' expression ')'."""
+    if position >= len(tokens):
+        raise ValueError("Unexpected end of expression")
+
+    token = tokens[position]
+
+    if token == "(":
+        position += 1
+        result, position = parse_expression(tokens, position)
+        if position >= len(tokens) or tokens[position] != ")":
+            raise ValueError("Missing closing parenthesis")
+        position += 1
+        return result, position
+
+    if isinstance(token, float):
+        return token, position + 1
+
+    raise ValueError(f"Unexpected token: {token}")
+
+
+def calculate(expression_text):
+    """Evaluate an arithmetic expression string and return the result.
+
+    Returns the numeric result or an error message string.
+    """
+    if not expression_text.strip():
+        return "Error: empty expression"
+
+    try:
+        tokens = tokenize(expression_text)
+        result, final_position = parse_expression(tokens, 0)
+
+        if final_position != len(tokens):
+            return f"Error: unexpected token '{tokens[final_position]}'"
+
+        if result == int(result):
+            return int(result)
+        return round(result, 10)
+
+    except (ValueError, ZeroDivisionError) as error:
+        return f"Error: {error}"
+
+
+def main():
+    """Run the calculator on a set of test expressions."""
+    test_expressions = [
+        "3 + 5",
+        "10 - 2 * 3",
+        "(4 + 6) * 2",
+        "100 / (5 * 2)",
+        "3.5 + 2.5 * 4",
+        "(1 + 2) * (3 + 4)",
+        "",
+        "10 / 0",
+        "abc + 1",
+    ]
+
+    for expression in test_expressions:
+        result = calculate(expression)
+        display_expr = expression if expression else "(empty)"
+        print(f"{display_expr} = {result}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Clean Code exercise/example1_calculator/calculator_usecase.aux b/Clean Code exercise/example1_calculator/calculator_usecase.aux
new file mode 100644
index 0000000..ee32f96
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_usecase.aux	
@@ -0,0 +1,11 @@
+\relax 
+\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
+\@nameuse{bbl@beforestart}
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\babel@aux{english}{}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Use Case}{1}{section.1}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {2}Example Input / Output}{1}{section.2}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {3}Exercise}{1}{section.3}\protected@file@percent }
+\gdef \@abspage@last{2}
diff --git a/Clean Code exercise/example1_calculator/calculator_usecase.out b/Clean Code exercise/example1_calculator/calculator_usecase.out
new file mode 100644
index 0000000..4b8f6e6
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_usecase.out	
@@ -0,0 +1,3 @@
+\BOOKMARK [1][-]{section.1}{\376\377\000U\000s\000e\000\040\000C\000a\000s\000e}{}% 1
+\BOOKMARK [1][-]{section.2}{\376\377\000E\000x\000a\000m\000p\000l\000e\000\040\000I\000n\000p\000u\000t\000\040\000/\000\040\000O\000u\000t\000p\000u\000t}{}% 2
+\BOOKMARK [1][-]{section.3}{\376\377\000E\000x\000e\000r\000c\000i\000s\000e}{}% 3
diff --git a/Clean Code exercise/example1_calculator/calculator_usecase.pdf b/Clean Code exercise/example1_calculator/calculator_usecase.pdf
new file mode 100644
index 0000000..fda7975
Binary files /dev/null and b/Clean Code exercise/example1_calculator/calculator_usecase.pdf differ
diff --git a/Clean Code exercise/example1_calculator/calculator_usecase.tex b/Clean Code exercise/example1_calculator/calculator_usecase.tex
new file mode 100644
index 0000000..9be9f61
--- /dev/null
+++ b/Clean Code exercise/example1_calculator/calculator_usecase.tex	
@@ -0,0 +1,90 @@
+\documentclass[12pt,a4paper]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage[english]{babel}
+\usepackage{geometry}
+\geometry{margin=2.5cm}
+\usepackage{xcolor}
+\usepackage{tcolorbox}
+\usepackage{booktabs}
+\usepackage{hyperref}
+
+\definecolor{seblue}{rgb}{0.0,0.28,0.67}
+
+\title{\textcolor{seblue}{Exercise 1: Arithmetic Expression Calculator}\\[0.3em]
+\large AISE501 -- AI in Software Engineering I}
+\author{Dr.\ Florian Herzog}
+\date{Spring Semester 2026}
+
+\begin{document}
+\maketitle
+
+\section{Use Case}
+
+A user enters an arithmetic expression as a text string, for example \texttt{"3 + 5 * 2"}.
+The program evaluates the expression and prints the result.
+
+The calculator must:
+\begin{itemize}
+    \item Support the four basic operations: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}
+    \item Respect standard operator precedence (\texttt{*} and \texttt{/} bind more tightly than \texttt{+} and \texttt{-})
+    \item Support parentheses for grouping, e.g.\ \texttt{"(4 + 6) * 2"}
+    \item Support decimal numbers, e.g.\ \texttt{"3.5 + 2.5"}
+    \item Handle errors gracefully (division by zero, invalid characters, empty input)
+    \item \textbf{Not} use Python's built-in \texttt{eval()} function
+\end{itemize}
+
+\section{Example Input / Output}
+
+\begin{center}
+\begin{tabular}{ll}
+\toprule
+\textbf{Input Expression} & \textbf{Expected Output} \\
+\midrule
+\texttt{3 + 5}              & \texttt{8}    \\
+\texttt{10 - 2 * 3}         & \texttt{4}    \\
+\texttt{(4 + 6) * 2}        & \texttt{20}   \\
+\texttt{100 / (5 * 2)}      & \texttt{10}   \\
+\texttt{3.5 + 2.5 * 4}      & \texttt{13.5} \\
+\texttt{(1 + 2) * (3 + 4)}  & \texttt{21}   \\
+\texttt{(empty)}             & Error message \\
+\texttt{10 / 0}              & Error message \\
+\texttt{abc + 1}             & Error message \\
+\bottomrule
+\end{tabular}
+\end{center}
+
+\section{Exercise}
+
+Two implementations are provided:
+
+\begin{enumerate}
+    \item \textbf{\texttt{calculator\_bad.py}} -- A working but poorly written version that violates many clean code and PEP\,8 principles.
+    \item \textbf{\texttt{calculator\_good.py}} -- A clean, well-structured version following PEP\,8 and clean code best practices.
+\end{enumerate}
+
+\subsection*{Tasks}
+
+\begin{enumerate}
+    \item Run both programs and verify they produce the same results.
+    \item Read the bad version and list all clean code / PEP\,8 violations you can find.
+    \item For each violation, explain which principle is broken and why it makes the code harder to read or maintain.
+    \item Compare your list with the good version to see how each issue was resolved.
+\end{enumerate}
+
+\subsection*{Violations to Look For}
+
+\begin{itemize}
+    \item Unused imports
+    \item Missing or misleading comments and docstrings
+    \item Poor variable and function names (abbreviations, single letters)
+    \item Inconsistent indentation and spacing
+    \item Multiple statements on one line (semicolons)
+    \item Missing whitespace around operators
+    \item No proper error handling (bare \texttt{except}, printing instead of raising)
+    \item Magic numbers and unclear logic flow
+    \item Missing \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard
+    \item No type clarity in function signatures
+\end{itemize}
+
+\end{document}
diff --git a/Clean Code exercise/example2_bank/accounts.json b/Clean Code exercise/example2_bank/accounts.json
new file mode 100644
index 0000000..1ee054b
--- /dev/null
+++ b/Clean Code exercise/example2_bank/accounts.json	
@@ -0,0 +1,25 @@
+{
+    "accounts": [
+        {
+            "account_id": "ACC-001",
+            "holder": "Alice Mueller",
+            "balance": 5000.00,
+            "currency": "CHF",
+            "status": "active"
+        },
+        {
+            "account_id": "ACC-002",
+            "holder": "Bob Schneider",
+            "balance": 1200.50,
+            "currency": "CHF",
+            "status": "active"
+        },
+        {
+            "account_id": "ACC-003",
+            "holder": "Clara Brunner",
+            "balance": 300.00,
+            "currency": "CHF",
+            "status": "frozen"
+        }
+    ]
+}
diff --git a/Clean Code exercise/example2_bank/accounts_updated_bad.json b/Clean Code exercise/example2_bank/accounts_updated_bad.json
new file mode 100644
index 0000000..ecf31b5
--- /dev/null
+++ b/Clean Code exercise/example2_bank/accounts_updated_bad.json	
@@ -0,0 +1,25 @@
+{
+  "accounts": [
+    {
+      "account_id": "ACC-001",
+      "holder": "Alice Mueller",
+      "balance": 4550.0,
+      "currency": "CHF",
+      "status": "active"
+    },
+    {
+      "account_id": "ACC-002",
+      "holder": "Bob Schneider",
+      "balance": 1950.5,
+      "currency": "CHF",
+      "status": "active"
+    },
+    {
+      "account_id": "ACC-003",
+      "holder": "Clara Brunner",
+      "balance": 300.0,
+      "currency": "CHF",
+      "status": "frozen"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Clean Code exercise/example2_bank/accounts_updated_good.json b/Clean Code exercise/example2_bank/accounts_updated_good.json
new file mode 100644
index 0000000..ecf31b5
--- /dev/null
+++ b/Clean Code exercise/example2_bank/accounts_updated_good.json	
@@ -0,0 +1,25 @@
+{
+  "accounts": [
+    {
+      "account_id": "ACC-001",
+      "holder": "Alice Mueller",
+      "balance": 4550.0,
+      "currency": "CHF",
+      "status": "active"
+    },
+    {
+      "account_id": "ACC-002",
+      "holder": "Bob Schneider",
+      "balance": 1950.5,
+      "currency": "CHF",
+      "status": "active"
+    },
+    {
+      "account_id": "ACC-003",
+      "holder": "Clara Brunner",
+      "balance": 300.0,
+      "currency": "CHF",
+      "status": "frozen"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Clean Code exercise/example2_bank/bank_analysis.aux b/Clean Code exercise/example2_bank/bank_analysis.aux
new file mode 100644
index 0000000..9d052fe
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_analysis.aux	
@@ -0,0 +1,20 @@
+\relax 
+\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
+\@nameuse{bbl@beforestart}
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\babel@aux{english}{}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{2}{section.1}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {2}Violation 1: Unused Imports and Import Formatting}{2}{section.2}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {3}Violation 2: No Documentation or Docstrings}{2}{section.3}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {4}Violation 3: Implicit Data Model}{3}{section.4}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {5}Violation 4: Poor Naming}{4}{section.5}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {6}Violation 5: Formatting -- Semicolons and Dense Lines}{5}{section.6}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {7}Violation 6: No Context Managers for File I/O}{6}{section.7}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {8}Violation 7: God Function -- Single Responsibility Violation}{7}{section.8}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {9}Violation 8: Magic Strings Instead of Constants}{8}{section.9}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {10}Violation 9: Comparison with \texttt  {None}}{8}{section.10}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {11}Violation 10: Missing \texttt  {\_\_main\_\_} Guard and String Formatting}{9}{section.11}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {12}Summary of Violations}{10}{section.12}\protected@file@percent }
+\gdef \@abspage@last{10}
diff --git a/Clean Code exercise/example2_bank/bank_analysis.out b/Clean Code exercise/example2_bank/bank_analysis.out
new file mode 100644
index 0000000..e6ce316
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_analysis.out	
@@ -0,0 +1,12 @@
+\BOOKMARK [1][-]{section.1}{\376\377\000O\000v\000e\000r\000v\000i\000e\000w}{}% 1
+\BOOKMARK [1][-]{section.2}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0001\000:\000\040\000U\000n\000u\000s\000e\000d\000\040\000I\000m\000p\000o\000r\000t\000s\000\040\000a\000n\000d\000\040\000I\000m\000p\000o\000r\000t\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g}{}% 2
+\BOOKMARK [1][-]{section.3}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0002\000:\000\040\000N\000o\000\040\000D\000o\000c\000u\000m\000e\000n\000t\000a\000t\000i\000o\000n\000\040\000o\000r\000\040\000D\000o\000c\000s\000t\000r\000i\000n\000g\000s}{}% 3
+\BOOKMARK [1][-]{section.4}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0003\000:\000\040\000I\000m\000p\000l\000i\000c\000i\000t\000\040\000D\000a\000t\000a\000\040\000M\000o\000d\000e\000l}{}% 4
+\BOOKMARK [1][-]{section.5}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0004\000:\000\040\000P\000o\000o\000r\000\040\000N\000a\000m\000i\000n\000g}{}% 5
+\BOOKMARK [1][-]{section.6}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0005\000:\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g\000\040\040\023\000\040\000S\000e\000m\000i\000c\000o\000l\000o\000n\000s\000\040\000a\000n\000d\000\040\000D\000e\000n\000s\000e\000\040\000L\000i\000n\000e\000s}{}% 6
+\BOOKMARK [1][-]{section.7}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0006\000:\000\040\000N\000o\000\040\000C\000o\000n\000t\000e\000x\000t\000\040\000M\000a\000n\000a\000g\000e\000r\000s\000\040\000f\000o\000r\000\040\000F\000i\000l\000e\000\040\000I\000/\000O}{}% 7
+\BOOKMARK [1][-]{section.8}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0007\000:\000\040\000G\000o\000d\000\040\000F\000u\000n\000c\000t\000i\000o\000n\000\040\040\023\000\040\000S\000i\000n\000g\000l\000e\000\040\000R\000e\000s\000p\000o\000n\000s\000i\000b\000i\000l\000i\000t\000y\000\040\000V\000i\000o\000l\000a\000t\000i\000o\000n}{}% 8
+\BOOKMARK [1][-]{section.9}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0008\000:\000\040\000M\000a\000g\000i\000c\000\040\000S\000t\000r\000i\000n\000g\000s\000\040\000I\000n\000s\000t\000e\000a\000d\000\040\000o\000f\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s}{}% 9
+\BOOKMARK [1][-]{section.10}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0009\000:\000\040\000C\000o\000m\000p\000a\000r\000i\000s\000o\000n\000\040\000w\000i\000t\000h\000\040\000N\000o\000n\000e}{}% 10
+\BOOKMARK [1][-]{section.11}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0001\0000\000:\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000\137\000\137\000m\000a\000i\000n\000\137\000\137\000\040\000G\000u\000a\000r\000d\000\040\000a\000n\000d\000\040\000S\000t\000r\000i\000n\000g\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g}{}% 11
+\BOOKMARK [1][-]{section.12}{\376\377\000S\000u\000m\000m\000a\000r\000y\000\040\000o\000f\000\040\000V\000i\000o\000l\000a\000t\000i\000o\000n\000s}{}% 12
diff --git a/Clean Code exercise/example2_bank/bank_analysis.pdf b/Clean Code exercise/example2_bank/bank_analysis.pdf
new file mode 100644
index 0000000..579d750
Binary files /dev/null and b/Clean Code exercise/example2_bank/bank_analysis.pdf differ
diff --git a/Clean Code exercise/example2_bank/bank_analysis.tex b/Clean Code exercise/example2_bank/bank_analysis.tex
new file mode 100644
index 0000000..1f91ad2
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_analysis.tex	
@@ -0,0 +1,526 @@
+\documentclass[12pt,a4paper]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage[english]{babel}
+\usepackage{geometry}
+\geometry{margin=2.5cm}
+\usepackage{xcolor}
+\usepackage{tcolorbox}
+\usepackage{booktabs}
+\usepackage{hyperref}
+\usepackage{listings}
+\usepackage{enumitem}
+
+\definecolor{seblue}{rgb}{0.0,0.28,0.67}
+\definecolor{segreen}{rgb}{0.13,0.55,0.13}
+\definecolor{sered}{rgb}{0.7,0.13,0.13}
+\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
+\definecolor{codegreen}{rgb}{0,0.6,0}
+\definecolor{codepurple}{rgb}{0.58,0,0.82}
+
+\lstdefinestyle{pystyle}{
+    backgroundcolor=\color{backcolour},
+    commentstyle=\color{codegreen},
+    keywordstyle=\color{blue},
+    stringstyle=\color{codepurple},
+    basicstyle=\ttfamily\footnotesize,
+    breaklines=true,
+    keepspaces=true,
+    showstringspaces=false,
+    tabsize=4,
+    language=Python
+}
+\lstset{style=pystyle}
+
+\newtcolorbox{badbox}{
+    colback=red!5!white,
+    colframe=sered,
+    title=Bad Code,
+    fonttitle=\bfseries\small,
+    boxrule=0.8pt, arc=2pt,
+    top=2pt, bottom=2pt, left=4pt, right=4pt
+}
+
+\newtcolorbox{goodbox}{
+    colback=green!5!white,
+    colframe=segreen,
+    title=Clean Code,
+    fonttitle=\bfseries\small,
+    boxrule=0.8pt, arc=2pt,
+    top=2pt, bottom=2pt, left=4pt, right=4pt
+}
+
+\newtcolorbox{principlebox}[1][]{
+    colback=blue!5!white,
+    colframe=seblue,
+    title=#1,
+    fonttitle=\bfseries\small,
+    boxrule=0.8pt, arc=2pt,
+    top=2pt, bottom=2pt, left=4pt, right=4pt
+}
+
+\title{\textcolor{seblue}{Code Analysis: Bank Account Transaction Processor}\\[0.3em]
+\large What Makes Code Bad and How to Fix It\\[0.3em]
+\normalsize AISE501 -- AI in Software Engineering I}
+\author{Dr.\ Florian Herzog}
+\date{Spring Semester 2026}
+
+\begin{document}
+\maketitle
+\tableofcontents
+\newpage
+
+% ============================================
+\section{Overview}
+% ============================================
+
+This document analyses two implementations of a bank account transaction processor.
+Both read account state and transactions from JSON files, validate each transaction, apply valid ones, reject invalid ones, and write results.
+Both produce identical output, but \texttt{bank\_bad.py} violates many PEP\,8 and clean code principles, while \texttt{bank\_good.py} follows them consistently.
+
+% ============================================
+\section{Violation 1: Unused Imports and Import Formatting}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+import json,sys,os,copy;from datetime import datetime
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+import json
+from typing import TypedDict, Optional
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong:}
+\begin{itemize}
+    \item \texttt{sys}, \texttt{os}, \texttt{copy}, and \texttt{datetime} are imported but \textbf{never used}.
+    \item All imports are \textbf{on a single line} separated by commas, with a semicolon joining two import statements.
+    \item PEP\,8 requires each import on its own line and groups separated by blank lines (standard library, third-party, local).
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Imports}: Imports should be on separate lines. Remove unused imports.
+    \item \textbf{KISS}: Unused imports add noise and suggest false dependencies.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 2: No Documentation or Docstrings}
+% ============================================
+
+\begin{badbox}
+The file has \textbf{no module docstring} and \textbf{no function docstrings}. The only comment in the entire file is:
+\begin{lstlisting}
+    # find account
+    ...
+    # print results
+\end{lstlisting}
+These comments describe \textit{what} the next line does (which is already obvious from the code), not \textit{why}.
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+"""Bank account transaction processor.
+
+Reads account state and a list of transactions from JSON files,
+validates and applies each transaction, then writes updated account
+state and a transaction log (accepted / rejected) to output files.
+"""
+\end{lstlisting}
+Every function has a docstring:
+\begin{lstlisting}
+def validate_common(
+    account: Optional[Account],
+    amount: float,
+) -> Optional[str]:
+    """Run validations shared by all transaction types.
+
+    Returns an error message string, or None if valid.
+    """
+\end{lstlisting}
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,257}: All public modules and functions should have docstrings.
+    \item \textbf{Clean Code -- Comments}: Don't add noise comments that just restate the code. Comments should explain \textit{why}, not \textit{what}.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 3: Implicit Data Model}
+% ============================================
+
+\begin{badbox}
+The bad version operates on raw dictionaries with no type declarations.
+A reader must trace through the JSON file and every dictionary access to understand the data shape:
+\begin{lstlisting}
+def proc(accs,txns):
+    for t in txns:
+        tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
+        a=None
+        for x in accs:
+            if x['account_id']==aid:a=x
+\end{lstlisting}
+What fields does \texttt{t} have? What fields does \texttt{a} have? There is no way to know without reading the JSON file.
+\end{badbox}
+
+\begin{goodbox}
+The good version defines explicit data types:
+\begin{lstlisting}
+class Account(TypedDict):
+    """A bank account with its current state."""
+    account_id: str
+    holder: str
+    balance: float
+    currency: str
+    status: str            # "active" or "frozen"
+
+class Transaction(TypedDict, total=False):
+    """A financial transaction to be processed."""
+    id: str
+    type: str              # "deposit", "withdrawal", or "transfer"
+    account_id: str
+    amount: float
+    description: str
+    to_account_id: str     # only for transfers
+    status: str            # added after processing
+    reason: str            # added on rejection
+\end{lstlisting}
+All function signatures carry type annotations:
+\begin{lstlisting}
+def find_account(accounts: list[Account], account_id: str) -> Optional[Account]:
+\end{lstlisting}
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{Zen of Python}: ``Explicit is better than implicit.''
+    \item \textbf{Clean Code -- Readability}: A reader should understand the data contract without tracing through runtime data.
+    \item \textbf{PEP\,484 / PEP\,589}: Use type hints and \texttt{TypedDict} to document the structure of dictionary-based data.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 4: Poor Naming}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+def loadJ(p):           # "J" for JSON? "p" for path?
+def saveJ(p,d):         # "d" for data?
+def proc(accs,txns):    # "proc" does what exactly?
+    ok=[];bad=[]        # acceptable vs. rejected
+    tp=t['type']        # "tp" is unpronounceable
+    aid=t['account_id'] # "aid" looks like "aid" (help)
+    amt=t['amount']     # "amt" -- abbreviation
+    tid=t['id']         # "tid" -- never used again!
+    a=None              # "a" for account
+    ta=None             # "ta" for target account
+    for x in accs:      # "x" for what?
+    D=loadJ(...)        # capital "D" for a local variable
+    T=loadJ(...)        # capital "T" for a local variable
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+def load_json(file_path):
+def save_json(file_path, data):
+def find_account(accounts, account_id):
+def validate_common(account, amount):
+def process_deposit(accounts, transaction):
+def process_withdrawal(accounts, transaction):
+def process_transfer(accounts, transaction):
+def process_all_transactions(accounts, transactions):
+def print_results(accounts, accepted, rejected):
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong:}
+\begin{itemize}
+    \item Function names use \textbf{abbreviations} (\texttt{loadJ}, \texttt{saveJ}, \texttt{proc}) instead of descriptive snake\_case names.
+    \item Variable names are \textbf{single letters or short abbreviations} (\texttt{a}, \texttt{t}, \texttt{x}, \texttt{tp}, \texttt{aid}, \texttt{amt}, \texttt{ta}).
+    \item \texttt{tid} is assigned but \textbf{never used} --- dead code.
+    \item \texttt{D} and \texttt{T} use \textbf{uppercase}, suggesting constants, but they are local variables.
+    \item The name \texttt{ok} for accepted transactions and \texttt{bad} for rejected ones is \textbf{imprecise}.
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}.
+    \item \textbf{Clean Code -- Descriptive Names}: ``Other developers should figure out what a variable stores just by reading its name.''
+    \item \textbf{Clean Code -- Consistent Vocabulary}: Don't mix \texttt{ok}/\texttt{bad} with \texttt{accepted}/\texttt{rejected}.
+    \item \textbf{Clean Code -- No Abbreviations}: \texttt{amt}, \texttt{tp}, \texttt{tid} are not words.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 5: Formatting -- Semicolons and Dense Lines}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+f=open(p,'r');d=json.load(f);f.close();return d
+\end{lstlisting}
+\begin{lstlisting}
+tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
+\end{lstlisting}
+\begin{lstlisting}
+a['balance']=a['balance']+amt;t['status']='accepted';ok.append(t)
+\end{lstlisting}
+\begin{lstlisting}
+if a==None:
+    t['reason']='account not found';bad.append(t);continue
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+Every statement is on its own line with proper whitespace:
+\begin{lstlisting}
+account = find_account(accounts, transaction["account_id"])
+error = validate_common(account, transaction["amount"])
+if error:
+    return False, error
+
+account["balance"] += transaction["amount"]
+return True, "accepted"
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong:}
+\begin{itemize}
+    \item \textbf{Semicolons} pack 3--4 statements onto one line, making it nearly impossible to follow the logic.
+    \item \textbf{No whitespace} around \texttt{=} and after commas.
+    \item Control flow (\texttt{continue}) is \textbf{hidden at the end of a dense line}.
+    \item PEP\,8 explicitly states: ``Compound statements (multiple statements on the same line) are generally discouraged.''
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Compound Statements}: Generally discouraged. Each statement on its own line.
+    \item \textbf{PEP\,8 -- Whitespace}: Surround operators with spaces. Space after commas.
+    \item \textbf{Zen of Python}: ``Readability counts.'' ``Sparse is better than dense.''
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 6: No Context Managers for File I/O}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+def loadJ(p):
+    f=open(p,'r');d=json.load(f);f.close();return d
+
+def saveJ(p,d):
+    f=open(p,'w');json.dump(d,f,indent=2);f.close()
+\end{lstlisting}
+If \texttt{json.load(f)} raises an exception, the file is \textbf{never closed} because \texttt{f.close()} is skipped. This is a resource leak.
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+def load_json(file_path: str) -> dict:
+    """Read and parse a JSON file, returning the parsed data."""
+    with open(file_path, "r", encoding="utf-8") as file_handle:
+        return json.load(file_handle)
+\end{lstlisting}
+The \texttt{with} statement guarantees the file is closed even if an exception occurs.
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{Pythonic Code}: Always use context managers (\texttt{with}) for resource management.
+    \item \textbf{Clean Code -- Error Handling}: Code should be robust against exceptions. Manual \texttt{open}/\texttt{close} is error-prone.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 7: God Function -- Single Responsibility Violation}
+% ============================================
+
+\begin{badbox}
+The function \texttt{proc()} is 38 lines long and handles \textbf{all of the following} in a single function:
+\begin{itemize}[nosep]
+    \item Finding accounts by ID
+    \item Validating account status
+    \item Validating amounts
+    \item Processing deposits
+    \item Processing withdrawals
+    \item Processing transfers (including finding the target account)
+    \item Handling unknown transaction types
+    \item Building accepted and rejected lists
+\end{itemize}
+\begin{lstlisting}
+def proc(accs,txns):
+    ok=[];bad=[]
+    for t in txns:
+        ...  # 35 lines of nested if/elif/else with continue
+    return accs,ok,bad
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+The good version splits this into \textbf{seven focused functions}:
+\begin{lstlisting}
+def find_account(accounts, account_id):       # lookup
+def validate_common(account, amount):         # shared validation
+def process_deposit(accounts, transaction):   # deposit logic
+def process_withdrawal(accounts, transaction):# withdrawal logic
+def process_transfer(accounts, transaction):  # transfer logic
+def process_all_transactions(accounts, transactions): # orchestration
+def print_results(accounts, accepted, rejected):      # output
+\end{lstlisting}
+A dispatch dictionary replaces the \texttt{if/elif} chain:
+\begin{lstlisting}
+TRANSACTION_HANDLERS = {
+    "deposit": process_deposit,
+    "withdrawal": process_withdrawal,
+    "transfer": process_transfer,
+}
+\end{lstlisting}
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{SRP (Single Responsibility Principle)}: Each function should have one reason to change.
+    \item \textbf{DRY (Don't Repeat Yourself)}: The amount validation (\texttt{amt<=0}) is duplicated for deposits and transfers in the bad version; \texttt{validate\_common()} eliminates this.
+    \item \textbf{Clean Code -- Short Functions}: Functions should be comprehensible in a few minutes.
+    \item \textbf{Open-Closed Principle}: Adding a new transaction type in the bad version requires modifying the \texttt{proc()} function. In the good version, you add a new handler function and register it in the dictionary.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 8: Magic Strings Instead of Constants}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+if a['status']!='active':     # magic string
+    ...
+if tp=='deposit':             # magic string
+    ...
+\end{lstlisting}
+The strings \texttt{'active'}, \texttt{'deposit'}, \texttt{'withdrawal'}, and \texttt{'transfer'} appear throughout the code as \textbf{literals}. If the status name ever changed, every occurrence would need to be found and updated.
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+ACTIVE_STATUS = "active"
+...
+if account["status"] != ACTIVE_STATUS:
+\end{lstlisting}
+Transaction types are handled via the \texttt{TRANSACTION\_HANDLERS} dictionary, so the string literals appear only \textbf{once} in the handler registration.
+\end{goodbox}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{Clean Code -- No Magic Numbers/Strings}: Use named constants for values that carry domain meaning.
+    \item \textbf{DRY}: The same literal repeated in multiple places is a maintenance risk.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 9: Comparison with \texttt{None}}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+if a==None:
+    ...
+if ta==None:
+    ...
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+if account is None:
+    ...
+if target is None:
+    ...
+\end{lstlisting}
+\end{goodbox}
+
+PEP\,8 explicitly states: ``Comparisons to singletons like \texttt{None} should always be done with \texttt{is} or \texttt{is not}, never the equality operators.''
+The \texttt{is} operator checks \textbf{identity} (the correct test for \texttt{None}), while \texttt{==} checks \textbf{equality} and can be overridden by custom \texttt{\_\_eq\_\_} methods.
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{PEP\,8 -- Programming Recommendations}: Use \texttt{is None}, not \texttt{== None}.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Violation 10: Missing \texttt{\_\_main\_\_} Guard and String Formatting}
+% ============================================
+
+\begin{badbox}
+\begin{lstlisting}
+main()
+\end{lstlisting}
+\begin{lstlisting}
+print("  "+a['account_id']+" "+a['holder']+": "+str(a['balance'])
+    +" "+a['currency']+" ("+a['status']+")")
+\end{lstlisting}
+\end{badbox}
+
+\begin{goodbox}
+\begin{lstlisting}
+if __name__ == "__main__":
+    main()
+\end{lstlisting}
+\begin{lstlisting}
+print(
+    f"  {account['account_id']}  {account['holder']}: "
+    f"{account['balance']:.2f} {account['currency']}  "
+    f"({account['status']})"
+)
+\end{lstlisting}
+\end{goodbox}
+
+\textbf{What is wrong:}
+\begin{itemize}
+    \item No \texttt{\_\_main\_\_} guard means importing the module triggers execution.
+    \item String concatenation with \texttt{+} and \texttt{str()} is harder to read than f-strings.
+    \item The bad version does not format numbers (\texttt{str(5000.0)} vs.\ \texttt{5000.00}).
+\end{itemize}
+
+\begin{principlebox}[Principles Violated]
+\begin{itemize}[nosep]
+    \item \textbf{Clean Code -- Avoid Side Effects}: Importing should not trigger execution.
+    \item \textbf{Pythonic Code}: Use f-strings for string formatting.
+\end{itemize}
+\end{principlebox}
+
+% ============================================
+\section{Summary of Violations}
+% ============================================
+
+\begin{center}
+\small
+\begin{tabular}{@{}rp{4.5cm}p{5.5cm}@{}}
+\toprule
+\textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\
+\midrule
+1  & Unused imports, one-line format        & PEP\,8 Imports, KISS \\
+2  & No docstrings, noise comments          & PEP\,257, Clean Code Documentation \\
+3  & Implicit data model (raw dicts)        & Explicit $>$ Implicit, PEP\,484/589 \\
+4  & Abbreviations, single-letter names     & PEP\,8 Naming, Descriptive Names \\
+5  & Semicolons, dense lines, no whitespace & PEP\,8 Whitespace, Zen of Python \\
+6  & Manual file open/close                 & Pythonic Code, Context Managers \\
+7  & God function (38-line \texttt{proc})   & SRP, DRY, Open-Closed Principle \\
+8  & Magic strings                          & No Magic Numbers, DRY \\
+9  & \texttt{== None} instead of \texttt{is None} & PEP\,8 Programming Recommendations \\
+10 & No \texttt{\_\_main\_\_} guard, string concat & Side Effects, Pythonic Code \\
+\bottomrule
+\end{tabular}
+\end{center}
+
+\end{document}
diff --git a/Clean Code exercise/example2_bank/bank_analysis.toc b/Clean Code exercise/example2_bank/bank_analysis.toc
new file mode 100644
index 0000000..ae12702
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_analysis.toc	
@@ -0,0 +1,13 @@
+\babel@toc {english}{}\relax 
+\contentsline {section}{\numberline {1}Overview}{2}{section.1}%
+\contentsline {section}{\numberline {2}Violation 1: Unused Imports and Import Formatting}{2}{section.2}%
+\contentsline {section}{\numberline {3}Violation 2: No Documentation or Docstrings}{2}{section.3}%
+\contentsline {section}{\numberline {4}Violation 3: Implicit Data Model}{3}{section.4}%
+\contentsline {section}{\numberline {5}Violation 4: Poor Naming}{4}{section.5}%
+\contentsline {section}{\numberline {6}Violation 5: Formatting -- Semicolons and Dense Lines}{5}{section.6}%
+\contentsline {section}{\numberline {7}Violation 6: No Context Managers for File I/O}{6}{section.7}%
+\contentsline {section}{\numberline {8}Violation 7: God Function -- Single Responsibility Violation}{7}{section.8}%
+\contentsline {section}{\numberline {9}Violation 8: Magic Strings Instead of Constants}{8}{section.9}%
+\contentsline {section}{\numberline {10}Violation 9: Comparison with \texttt {None}}{8}{section.10}%
+\contentsline {section}{\numberline {11}Violation 10: Missing \texttt {\_\_main\_\_} Guard and String Formatting}{9}{section.11}%
+\contentsline {section}{\numberline {12}Summary of Violations}{10}{section.12}%
diff --git a/Clean Code exercise/example2_bank/bank_bad.py b/Clean Code exercise/example2_bank/bank_bad.py
new file mode 100644
index 0000000..5489cb7
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_bad.py	
@@ -0,0 +1,62 @@
+import json,sys,os,copy;from datetime import datetime
+
+def loadJ(p):
+    f=open(p,'r');d=json.load(f);f.close();return d
+
+def saveJ(p,d):
+    f=open(p,'w');json.dump(d,f,indent=2);f.close()
+
+def proc(accs,txns):
+    ok=[];bad=[]
+    for t in txns:
+        tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
+        # find account
+        a=None
+        for x in accs:
+            if x['account_id']==aid:a=x
+        if a==None:
+            t['reason']='account not found';bad.append(t);continue
+        if a['status']!='active':
+            t['reason']='account not active';bad.append(t);continue
+        if amt<=0 and tp!='withdrawal':
+            if tp=='deposit':t['reason']='invalid amount';bad.append(t);continue
+            if tp=='transfer':t['reason']='invalid amount';bad.append(t);continue
+        if amt<=0 and tp=='withdrawal':
+            t['reason']='invalid amount';bad.append(t);continue
+        if tp=='deposit':
+            a['balance']=a['balance']+amt;t['status']='accepted';ok.append(t)
+        elif tp=='withdrawal':
+            if a['balance']>=amt:
+                a['balance']=a['balance']-amt;t['status']='accepted';ok.append(t)
+            else:
+                t['reason']='insufficient funds';t['status']='rejected';bad.append(t)
+        elif tp=='transfer':
+            ta=None
+            for x in accs:
+                if x['account_id']==t.get('to_account_id',''):ta=x
+            if ta==None:t['reason']='target account not found';bad.append(t);continue
+            if ta['status']!='active':t['reason']='target account not active';bad.append(t);continue
+            if a['balance']>=amt:
+                a['balance']=a['balance']-amt;ta['balance']=ta['balance']+amt
+                t['status']='accepted';ok.append(t)
+            else:
+                t['reason']='insufficient funds';t['status']='rejected';bad.append(t)
+        else:
+            t['reason']='unknown type';bad.append(t)
+    return accs,ok,bad
+
+def main():
+    D=loadJ('accounts.json');T=loadJ('transactions.json')
+    accs=D['accounts'];txns=T['transactions']
+    accs,ok,bad=proc(accs,txns)
+    # print results
+    print("=== UPDATED ACCOUNTS ===")
+    for a in accs:print("  "+a['account_id']+" "+a['holder']+": "+str(a['balance'])+" "+a['currency']+" ("+a['status']+")")
+    print("\n=== ACCEPTED ("+str(len(ok))+") ===")
+    for t in ok:print("  "+t['id']+" "+t['type']+" "+str(t['amount'])+" -> "+t.get('description',''))
+    print("\n=== REJECTED ("+str(len(bad))+") ===")
+    for t in bad:print("  "+t['id']+" "+t['type']+" "+str(t['amount'])+" -> "+t.get('reason','unknown'))
+    saveJ('accounts_updated_bad.json',{"accounts":accs})
+    saveJ('transaction_log_bad.json',{"accepted":ok,"rejected":bad})
+
+main()
diff --git a/Clean Code exercise/example2_bank/bank_good.py b/Clean Code exercise/example2_bank/bank_good.py
new file mode 100644
index 0000000..0c87662
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_good.py	
@@ -0,0 +1,280 @@
+"""Bank account transaction processor.
+
+Reads account state and a list of transactions from JSON files,
+validates and applies each transaction, then writes updated account
+state and a transaction log (accepted / rejected) to output files.
+"""
+
+import json
+from typing import TypedDict, Optional
+
+
+# ---------------------------------------------------------------------------
+# Explicit data model -- defines the exact shape of every data structure
+# ---------------------------------------------------------------------------
+
+class Account(TypedDict):
+    """A bank account with its current state."""
+    account_id: str
+    holder: str
+    balance: float
+    currency: str
+    status: str            # "active" or "frozen"
+
+
+class Transaction(TypedDict, total=False):
+    """A financial transaction to be processed.
+
+    Fields marked total=False are optional (e.g. to_account_id only
+    exists for transfers; status/reason are added during processing).
+    """
+    id: str
+    type: str              # "deposit", "withdrawal", or "transfer"
+    account_id: str
+    amount: float
+    description: str
+    to_account_id: str     # only for transfers
+    status: str            # added after processing: "accepted" / "rejected"
+    reason: str            # added on rejection
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+ACCOUNTS_INPUT = "accounts.json"
+TRANSACTIONS_INPUT = "transactions.json"
+ACCOUNTS_OUTPUT = "accounts_updated_good.json"
+TRANSACTION_LOG_OUTPUT = "transaction_log_good.json"
+
+ACTIVE_STATUS = "active"
+
+
+# ---------------------------------------------------------------------------
+# File I/O
+# ---------------------------------------------------------------------------
+
+def load_json(file_path: str) -> dict:
+    """Read and parse a JSON file, returning the parsed data."""
+    with open(file_path, "r", encoding="utf-8") as file_handle:
+        return json.load(file_handle)
+
+
+def save_json(file_path: str, data: dict) -> None:
+    """Write data to a JSON file with readable indentation."""
+    with open(file_path, "w", encoding="utf-8") as file_handle:
+        json.dump(data, file_handle, indent=2, ensure_ascii=False)
+
+
+def load_accounts(file_path: str) -> list[Account]:
+    """Load and return the list of accounts from a JSON file."""
+    data = load_json(file_path)
+    return data["accounts"]
+
+
+def load_transactions(file_path: str) -> list[Transaction]:
+    """Load and return the list of transactions from a JSON file."""
+    data = load_json(file_path)
+    return data["transactions"]
+
+
+# ---------------------------------------------------------------------------
+# Account lookup
+# ---------------------------------------------------------------------------
+
+def find_account(accounts: list[Account], account_id: str) -> Optional[Account]:
+    """Find an account by its ID. Returns the account dict or None."""
+    for account in accounts:
+        if account["account_id"] == account_id:
+            return account
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Validation
+# ---------------------------------------------------------------------------
+
+def validate_common(
+    account: Optional[Account],
+    amount: float,
+) -> Optional[str]:
+    """Run validations shared by all transaction types.
+
+    Returns an error message string, or None if valid.
+    """
+    if account is None:
+        return "account not found"
+
+    if account["status"] != ACTIVE_STATUS:
+        return f"account is {account['status']}"
+
+    if amount is None or amount <= 0:
+        return "amount must be positive"
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Transaction handlers -- one function per transaction type
+# ---------------------------------------------------------------------------
+
+def process_deposit(
+    accounts: list[Account],
+    transaction: Transaction,
+) -> tuple[bool, str]:
+    """Apply a deposit transaction. Returns (success, reason)."""
+    account = find_account(accounts, transaction["account_id"])
+    error = validate_common(account, transaction["amount"])
+    if error:
+        return False, error
+
+    account["balance"] += transaction["amount"]
+    return True, "accepted"
+
+
+def process_withdrawal(
+    accounts: list[Account],
+    transaction: Transaction,
+) -> tuple[bool, str]:
+    """Apply a withdrawal transaction. Returns (success, reason)."""
+    account = find_account(accounts, transaction["account_id"])
+    error = validate_common(account, transaction["amount"])
+    if error:
+        return False, error
+
+    if account["balance"] < transaction["amount"]:
+        return False, "insufficient funds"
+
+    account["balance"] -= transaction["amount"]
+    return True, "accepted"
+
+
+def process_transfer(
+    accounts: list[Account],
+    transaction: Transaction,
+) -> tuple[bool, str]:
+    """Apply a transfer between two accounts. Returns (success, reason)."""
+    source = find_account(accounts, transaction["account_id"])
+    error = validate_common(source, transaction["amount"])
+    if error:
+        return False, f"source: {error}"
+
+    target_id = transaction.get("to_account_id", "")
+    target = find_account(accounts, target_id)
+
+    if target is None:
+        return False, "target account not found"
+    if target["status"] != ACTIVE_STATUS:
+        return False, f"target account is {target['status']}"
+
+    if source["balance"] < transaction["amount"]:
+        return False, "insufficient funds"
+
+    source["balance"] -= transaction["amount"]
+    target["balance"] += transaction["amount"]
+    return True, "accepted"
+
+
+TRANSACTION_HANDLERS = {
+    "deposit": process_deposit,
+    "withdrawal": process_withdrawal,
+    "transfer": process_transfer,
+}
+
+
+# ---------------------------------------------------------------------------
+# Processing
+# ---------------------------------------------------------------------------
+
+def process_all_transactions(
+    accounts: list[Account],
+    transactions: list[Transaction],
+) -> tuple[list[Transaction], list[Transaction]]:
+    """Process a list of transactions against the account state.
+
+    Returns two lists: (accepted_transactions, rejected_transactions).
+    Each transaction is augmented with 'status' and optionally 'reason'.
+    """
+    accepted: list[Transaction] = []
+    rejected: list[Transaction] = []
+
+    for transaction in transactions:
+        transaction_type = transaction.get("type", "")
+        handler = TRANSACTION_HANDLERS.get(transaction_type)
+
+        if handler is None:
+            transaction["status"] = "rejected"
+            transaction["reason"] = f"unknown transaction type '{transaction_type}'"
+            rejected.append(transaction)
+            continue
+
+        success, reason = handler(accounts, transaction)
+
+        if success:
+            transaction["status"] = "accepted"
+            accepted.append(transaction)
+        else:
+            transaction["status"] = "rejected"
+            transaction["reason"] = reason
+            rejected.append(transaction)
+
+    return accepted, rejected
+
+
+# ---------------------------------------------------------------------------
+# Output
+# ---------------------------------------------------------------------------
+
+def print_results(
+    accounts: list[Account],
+    accepted: list[Transaction],
+    rejected: list[Transaction],
+) -> None:
+    """Print a human-readable summary to the console."""
+    print("=== UPDATED ACCOUNTS ===")
+    for account in accounts:
+        print(
+            f"  {account['account_id']}  {account['holder']}: "
+            f"{account['balance']:.2f} {account['currency']}  "
+            f"({account['status']})"
+        )
+
+    print(f"\n=== ACCEPTED TRANSACTIONS ({len(accepted)}) ===")
+    for txn in accepted:
+        print(
+            f"  {txn['id']}  {txn['type']:12s}  {txn['amount']:>10.2f}  "
+            f"{txn.get('description', '')}"
+        )
+
+    print(f"\n=== REJECTED TRANSACTIONS ({len(rejected)}) ===")
+    for txn in rejected:
+        print(
+            f"  {txn['id']}  {txn['type']:12s}  {txn['amount']:>10.2f}  "
+            f"Reason: {txn.get('reason', 'unknown')}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    """Load data, process transactions, print and save results."""
+    accounts: list[Account] = load_accounts(ACCOUNTS_INPUT)
+    transactions: list[Transaction] = load_transactions(TRANSACTIONS_INPUT)
+
+    accepted, rejected = process_all_transactions(accounts, transactions)
+
+    print_results(accounts, accepted, rejected)
+
+    save_json(ACCOUNTS_OUTPUT, {"accounts": accounts})
+    save_json(TRANSACTION_LOG_OUTPUT, {
+        "accepted": accepted,
+        "rejected": rejected,
+    })
+
+    print(f"\nOutput written to {ACCOUNTS_OUTPUT} and {TRANSACTION_LOG_OUTPUT}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Clean Code exercise/example2_bank/bank_usecase.aux b/Clean Code exercise/example2_bank/bank_usecase.aux
new file mode 100644
index 0000000..dcc0324
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_usecase.aux	
@@ -0,0 +1,16 @@
+\relax 
+\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
+\@nameuse{bbl@beforestart}
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\babel@aux{english}{}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Use Case}{1}{section.1}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {2}Input Files}{1}{section.2}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Account State (\texttt  {accounts.json})}{1}{subsection.2.1}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Transactions (\texttt  {transactions.json})}{1}{subsection.2.2}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {3}Validation Rules}{1}{section.3}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {4}Output}{2}{section.4}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {5}Expected Results}{2}{section.5}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {6}Exercise}{2}{section.6}\protected@file@percent }
+\gdef \@abspage@last{3}
diff --git a/Clean Code exercise/example2_bank/bank_usecase.out b/Clean Code exercise/example2_bank/bank_usecase.out
new file mode 100644
index 0000000..e253279
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_usecase.out	
@@ -0,0 +1,8 @@
+\BOOKMARK [1][-]{section.1}{\376\377\000U\000s\000e\000\040\000C\000a\000s\000e}{}% 1
+\BOOKMARK [1][-]{section.2}{\376\377\000I\000n\000p\000u\000t\000\040\000F\000i\000l\000e\000s}{}% 2
+\BOOKMARK [2][-]{subsection.2.1}{\376\377\000A\000c\000c\000o\000u\000n\000t\000\040\000S\000t\000a\000t\000e\000\040\000\050\000a\000c\000c\000o\000u\000n\000t\000s\000.\000j\000s\000o\000n\000\051}{section.2}% 3
+\BOOKMARK [2][-]{subsection.2.2}{\376\377\000T\000r\000a\000n\000s\000a\000c\000t\000i\000o\000n\000s\000\040\000\050\000t\000r\000a\000n\000s\000a\000c\000t\000i\000o\000n\000s\000.\000j\000s\000o\000n\000\051}{section.2}% 4
+\BOOKMARK [1][-]{section.3}{\376\377\000V\000a\000l\000i\000d\000a\000t\000i\000o\000n\000\040\000R\000u\000l\000e\000s}{}% 5
+\BOOKMARK [1][-]{section.4}{\376\377\000O\000u\000t\000p\000u\000t}{}% 6
+\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000c\000t\000e\000d\000\040\000R\000e\000s\000u\000l\000t\000s}{}% 7
+\BOOKMARK [1][-]{section.6}{\376\377\000E\000x\000e\000r\000c\000i\000s\000e}{}% 8
diff --git a/Clean Code exercise/example2_bank/bank_usecase.pdf b/Clean Code exercise/example2_bank/bank_usecase.pdf
new file mode 100644
index 0000000..31bad72
Binary files /dev/null and b/Clean Code exercise/example2_bank/bank_usecase.pdf differ
diff --git a/Clean Code exercise/example2_bank/bank_usecase.tex b/Clean Code exercise/example2_bank/bank_usecase.tex
new file mode 100644
index 0000000..9131585
--- /dev/null
+++ b/Clean Code exercise/example2_bank/bank_usecase.tex	
@@ -0,0 +1,152 @@
+\documentclass[12pt,a4paper]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage[english]{babel}
+\usepackage{geometry}
+\geometry{margin=2.5cm}
+\usepackage{xcolor}
+\usepackage{tcolorbox}
+\usepackage{booktabs}
+\usepackage{hyperref}
+\usepackage{listings}
+
+\definecolor{seblue}{rgb}{0.0,0.28,0.67}
+\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
+
+\lstdefinestyle{json}{
+    backgroundcolor=\color{backcolour},
+    basicstyle=\ttfamily\small,
+    breaklines=true,
+    showstringspaces=false,
+    tabsize=2
+}
+
+\title{\textcolor{seblue}{Exercise 2: Bank Account Transaction Processor}\\[0.3em]
+\large AISE501 -- AI in Software Engineering I}
+\author{Dr.\ Florian Herzog}
+\date{Spring Semester 2026}
+
+\begin{document}
+\maketitle
+
+\section{Use Case}
+
+A simple bank system maintains a set of customer accounts, each with a balance, currency, and status (\texttt{active} or \texttt{frozen}).
+A series of transactions is submitted for processing.
+The program must validate each transaction, apply valid ones, reject invalid ones, and produce output files recording the results.
+
+\section{Input Files}
+
+\subsection{Account State (\texttt{accounts.json})}
+
+A JSON file containing an array of account objects:
+
+\begin{lstlisting}[style=json]
+{
+  "accounts": [
+    {
+      "account_id": "ACC-001",
+      "holder": "Alice Mueller",
+      "balance": 5000.00,
+      "currency": "CHF",
+      "status": "active"
+    },
+    ...
+  ]
+}
+\end{lstlisting}
+
+\subsection{Transactions (\texttt{transactions.json})}
+
+A JSON file containing an array of transaction objects.
+Each transaction has a \texttt{type} (\texttt{deposit}, \texttt{withdrawal}, or \texttt{transfer}), an \texttt{account\_id}, an \texttt{amount}, and a \texttt{description}.
+Transfers additionally have a \texttt{to\_account\_id}.
+
+\section{Validation Rules}
+
+A transaction is \textbf{rejected} if any of these conditions apply:
+
+\begin{center}
+\begin{tabular}{ll}
+\toprule
+\textbf{Condition} & \textbf{Applies to} \\
+\midrule
+Account ID does not exist                        & All types \\
+Account status is not \texttt{active}            & All types \\
+Amount is zero or negative                       & All types \\
+Balance is less than withdrawal amount            & Withdrawal, Transfer \\
+Target account does not exist                     & Transfer \\
+Target account is not \texttt{active}             & Transfer \\
+Unknown transaction type                          & -- \\
+\bottomrule
+\end{tabular}
+\end{center}
+
+\section{Output}
+
+The program produces:
+
+\begin{enumerate}
+    \item \textbf{Console output} -- A summary of updated account balances, accepted transactions, and rejected transactions with reasons.
+    \item \textbf{Updated account state} (\texttt{accounts\_updated.json}) -- The accounts JSON with balances modified by accepted transactions.
+    \item \textbf{Transaction log} (\texttt{transaction\_log.json}) -- Two arrays: \texttt{accepted} and \texttt{rejected}, each transaction annotated with its \texttt{status} and (for rejections) a \texttt{reason}.
+\end{enumerate}
+
+\section{Expected Results}
+
+Given the provided input files, the expected outcome is:
+
+\begin{center}
+\small
+\begin{tabular}{lllp{5cm}}
+\toprule
+\textbf{TXN ID} & \textbf{Type} & \textbf{Result} & \textbf{Reason (if rejected)} \\
+\midrule
+TXN-001 & deposit    & Accepted & -- \\
+TXN-002 & withdrawal & Accepted & -- \\
+TXN-003 & withdrawal & Rejected & Insufficient funds \\
+TXN-004 & deposit    & Rejected & Negative amount \\
+TXN-005 & deposit    & Rejected & Account is frozen \\
+TXN-006 & transfer   & Accepted & -- \\
+TXN-007 & withdrawal & Rejected & Account not found \\
+TXN-008 & deposit    & Rejected & Zero amount \\
+\bottomrule
+\end{tabular}
+\end{center}
+
+\section{Exercise}
+
+Two implementations are provided:
+
+\begin{enumerate}
+    \item \textbf{\texttt{bank\_bad.py}} -- A working but poorly written version that violates many clean code and PEP\,8 principles.
+    \item \textbf{\texttt{bank\_good.py}} -- A clean, well-structured version following PEP\,8 and clean code best practices.
+\end{enumerate}
+
+\subsection*{Tasks}
+
+\begin{enumerate}
+    \item Run both programs and verify they produce the same results.
+    \item Read the bad version and list all clean code / PEP\,8 violations you can find.
+    \item For each violation, explain which principle is broken and why it makes the code harder to read or maintain.
+    \item Compare your list with the good version to see how each issue was resolved.
+\end{enumerate}
+
+\subsection*{Violations to Look For}
+
+\begin{itemize}
+    \item Unused imports (\texttt{sys}, \texttt{os}, \texttt{copy}, \texttt{datetime})
+    \item No docstrings or module documentation
+    \item Single-letter and abbreviated variable names (\texttt{a}, \texttt{t}, \texttt{d}, \texttt{tp}, \texttt{tid})
+    \item Multiple statements per line (semicolons)
+    \item No whitespace around operators and after commas
+    \item Manual file open/close instead of context managers (\texttt{with})
+    \item One giant function doing all validation (violates Single Responsibility)
+    \item Duplicated validation logic for deposit/transfer amount checks
+    \item No constants for file paths
+    \item Missing \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard
+    \item Inconsistent error handling and status assignment
+    \item Hard-to-follow control flow with nested \texttt{if}/\texttt{elif}/\texttt{continue}
+\end{itemize}
+
+\end{document}
diff --git a/Clean Code exercise/example2_bank/transaction_log_bad.json b/Clean Code exercise/example2_bank/transaction_log_bad.json
new file mode 100644
index 0000000..295e094
--- /dev/null
+++ b/Clean Code exercise/example2_bank/transaction_log_bad.json	
@@ -0,0 +1,72 @@
+{
+  "accepted": [
+    {
+      "id": "TXN-001",
+      "type": "deposit",
+      "account_id": "ACC-001",
+      "amount": 500.0,
+      "description": "Salary payment",
+      "status": "accepted"
+    },
+    {
+      "id": "TXN-002",
+      "type": "withdrawal",
+      "account_id": "ACC-001",
+      "amount": 200.0,
+      "description": "ATM withdrawal",
+      "status": "accepted"
+    },
+    {
+      "id": "TXN-006",
+      "type": "transfer",
+      "account_id": "ACC-001",
+      "to_account_id": "ACC-002",
+      "amount": 750.0,
+      "description": "Transfer to Bob",
+      "status": "accepted"
+    }
+  ],
+  "rejected": [
+    {
+      "id": "TXN-003",
+      "type": "withdrawal",
+      "account_id": "ACC-002",
+      "amount": 1500.0,
+      "description": "Rent payment - exceeds balance",
+      "reason": "insufficient funds",
+      "status": "rejected"
+    },
+    {
+      "id": "TXN-004",
+      "type": "deposit",
+      "account_id": "ACC-002",
+      "amount": -100.0,
+      "description": "Invalid negative deposit",
+      "reason": "invalid amount"
+    },
+    {
+      "id": "TXN-005",
+      "type": "deposit",
+      "account_id": "ACC-003",
+      "amount": 1000.0,
+      "description": "Deposit to frozen account",
+      "reason": "account not active"
+    },
+    {
+      "id": "TXN-007",
+      "type": "withdrawal",
+      "account_id": "ACC-999",
+      "amount": 50.0,
+      "description": "Unknown account",
+      "reason": "account not found"
+    },
+    {
+      "id": "TXN-008",
+      "type": "deposit",
+      "account_id": "ACC-001",
+      "amount": 0,
+      "description": "Zero-amount deposit",
+      "reason": "invalid amount"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Clean Code exercise/example2_bank/transaction_log_good.json b/Clean Code exercise/example2_bank/transaction_log_good.json
new file mode 100644
index 0000000..edca35a
--- /dev/null
+++ b/Clean Code exercise/example2_bank/transaction_log_good.json	
@@ -0,0 +1,76 @@
+{
+  "accepted": [
+    {
+      "id": "TXN-001",
+      "type": "deposit",
+      "account_id": "ACC-001",
+      "amount": 500.0,
+      "description": "Salary payment",
+      "status": "accepted"
+    },
+    {
+      "id": "TXN-002",
+      "type": "withdrawal",
+      "account_id": "ACC-001",
+      "amount": 200.0,
+      "description": "ATM withdrawal",
+      "status": "accepted"
+    },
+    {
+      "id": "TXN-006",
+      "type": "transfer",
+      "account_id": "ACC-001",
+      "to_account_id": "ACC-002",
+      "amount": 750.0,
+      "description": "Transfer to Bob",
+      "status": "accepted"
+    }
+  ],
+  "rejected": [
+    {
+      "id": "TXN-003",
+      "type": "withdrawal",
+      "account_id": "ACC-002",
+      "amount": 1500.0,
+      "description": "Rent payment - exceeds balance",
+      "status": "rejected",
+      "reason": "insufficient funds"
+    },
+    {
+      "id": "TXN-004",
+      "type": "deposit",
+      "account_id": "ACC-002",
+      "amount": -100.0,
+      "description": "Invalid negative deposit",
+      "status": "rejected",
+      "reason": "amount must be positive"
+    },
+    {
+      "id": "TXN-005",
+      "type": "deposit",
+      "account_id": "ACC-003",
+      "amount": 1000.0,
+      "description": "Deposit to frozen account",
+      "status": "rejected",
+      "reason": "account is frozen"
+    },
+    {
+      "id": "TXN-007",
+      "type": "withdrawal",
+      "account_id": "ACC-999",
+      "amount": 50.0,
+      "description": "Unknown account",
+      "status": "rejected",
+      "reason": "account not found"
+    },
+    {
+      "id": "TXN-008",
+      "type": "deposit",
+      "account_id": "ACC-001",
+      "amount": 0,
+      "description": "Zero-amount deposit",
+      "status": "rejected",
+      "reason": "amount must be positive"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Clean Code exercise/example2_bank/transactions.json b/Clean Code exercise/example2_bank/transactions.json
new file mode 100644
index 0000000..340e3fa
--- /dev/null
+++ b/Clean Code exercise/example2_bank/transactions.json	
@@ -0,0 +1,61 @@
+{
+    "transactions": [
+        {
+            "id": "TXN-001",
+            "type": "deposit",
+            "account_id": "ACC-001",
+            "amount": 500.00,
+            "description": "Salary payment"
+        },
+        {
+            "id": "TXN-002",
+            "type": "withdrawal",
+            "account_id": "ACC-001",
+            "amount": 200.00,
+            "description": "ATM withdrawal"
+        },
+        {
+            "id": "TXN-003",
+            "type": "withdrawal",
+            "account_id": "ACC-002",
+            "amount": 1500.00,
+            "description": "Rent payment - exceeds balance"
+        },
+        {
+            "id": "TXN-004",
+            "type": "deposit",
+            "account_id": "ACC-002",
+            "amount": -100.00,
+            "description": "Invalid negative deposit"
+        },
+        {
+            "id": "TXN-005",
+            "type": "deposit",
+            "account_id": "ACC-003",
+            "amount": 1000.00,
+            "description": "Deposit to frozen account"
+        },
+        {
+            "id": "TXN-006",
+            "type": "transfer",
+            "account_id": "ACC-001",
+            "to_account_id": "ACC-002",
+            "amount": 750.00,
+            "description": "Transfer to Bob"
+        },
+        {
+            "id": "TXN-007",
+            "type": "withdrawal",
+            "account_id": "ACC-999",
+            "amount": 50.00,
+            "description": "Unknown account"
+        },
+        {
+            "id": "TXN-008",
+            "type": "deposit",
+            "account_id": "ACC-001",
+            "amount": 0,
+            "description": "Zero-amount deposit"
+        }
+    ]
+}
diff --git a/Code embeddings/00_tokens_and_embeddings_intro.py b/Code embeddings/00_tokens_and_embeddings_intro.py
new file mode 100644
index 0000000..825b4e6
--- /dev/null
+++ b/Code embeddings/00_tokens_and_embeddings_intro.py	
@@ -0,0 +1,486 @@
+"""
+============================================================================
+Example 0: Tokens, Embeddings, and Language Similarity — An Introduction
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+GOAL:
+    Before we look at CODE embeddings, we need to understand the
+    foundational concepts: tokenization and text embeddings. This script
+    walks through the full pipeline step by step, using German words
+    and phrases so you can build intuition in your native language.
+
+    The pipeline is:   Text → Tokens → Token IDs → Embedding Vectors
+
+WHAT YOU WILL LEARN:
+    1. How text is split into TOKENS (sub-word units)
+    2. How tokens are mapped to integer IDs (the model's vocabulary)
+    3. How token IDs become dense EMBEDDING VECTORS (768 dimensions)
+    4. How cosine similarity measures meaning — similar phrases are
+       close in vector space, different phrases are far apart
+    5. How to VISUALIZE the embedding space in 2D using PCA
+
+LANGUAGE:
+    All examples use German words and phrases to make the concepts
+    tangible. The model (multilingual) handles German natively.
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel, BertTokenizer
+import torch.nn.functional as F
+from sklearn.decomposition import PCA
+import matplotlib
+import matplotlib.pyplot as plt
+
+matplotlib.use("Agg")
+
+# ── Device selection ──────────────────────────────────────────────────────
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load a MULTILINGUAL EMBEDDING model ───────────────────────────────────
+# We use paraphrase-multilingual-mpnet-base-v2: a sentence embedding model
+# fine-tuned for semantic similarity across 50+ languages including German.
+# It uses an XLM-RoBERTa backbone and produces 768-dimensional embeddings
+# where cosine similarity directly reflects semantic similarity.
+MODEL_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
+
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+print("Model loaded.\n")
+
+# ── Load a German-only tokenizer for comparison ──────────────────────────
+# gbert-base uses WordPiece trained exclusively on German text (~31k vocab).
+# We only load its tokenizer — no model weights needed.
+GERMAN_TOKENIZER_NAME = "deepset/gbert-base"
+print(f"Loading German tokenizer: {GERMAN_TOKENIZER_NAME} ...")
+german_tokenizer = BertTokenizer.from_pretrained(GERMAN_TOKENIZER_NAME)
+print("German tokenizer loaded.\n")
+
+
+# ══════════════════════════════════════════════════════════════════════════
+# PART 1: TOKENIZATION — How text becomes numbers
+# ══════════════════════════════════════════════════════════════════════════
+print("=" * 70)
+print("PART 1: TOKENIZATION")
+print("=" * 70)
+print("""
+Neural networks cannot read text — they only understand numbers.
+TOKENIZATION is the first step: splitting text into sub-word pieces
+called TOKENS, then mapping each token to an integer ID.
+
+We compare two tokenizers:
+  • gbert (German-only, ~31k vocab)  — trained exclusively on German text
+  • mpnet (multilingual, ~250k vocab) — trained on 100+ languages
+""")
+
+german_words = [
+    "Fachhochschule",
+    "Softwareentwicklung",
+    "Künstliche Intelligenz",
+    "Programmiersprache",
+    "Datenbank",
+    "Maschinelles Lernen",
+    "Graubünden",
+    "unhappiness",       # English comparison
+]
+
+# ── 1a: German-only tokenizer (gbert / WordPiece) ────────────────────────
+print("─── 1a: German-Only Tokenizer (gbert, WordPiece, 31k vocab) ───\n")
+print(f"{'Word/Phrase':<28s} {'#':>3s}  {'Tokens'}")
+print("-" * 90)
+
+for word in german_words:
+    ids = german_tokenizer.encode(word, add_special_tokens=False)
+    toks = german_tokenizer.convert_ids_to_tokens(ids)
+    print(f"{word:<28s} {len(toks):3d}  {' | '.join(toks)}")
+
+# ── 1b: Multilingual tokenizer (mpnet / SentencePiece) ───────────────────
+print(f"\n─── 1b: Multilingual Tokenizer (mpnet, SentencePiece, 250k vocab) ───\n")
+print(f"{'Word/Phrase':<28s} {'#':>3s}  {'Tokens'}")
+print("-" * 90)
+
+for word in german_words:
+    ids = tokenizer.encode(word, add_special_tokens=False)
+    toks = tokenizer.convert_ids_to_tokens(ids)
+    print(f"{word:<28s} {len(toks):3d}  {' | '.join(toks)}")
+
+print("""
+KEY OBSERVATIONS:
+  • The GERMAN tokenizer keeps common words intact: "Fachhochschule" is
+    a SINGLE token, "Programmiersprache" splits at the natural compound
+    boundary "Programmier" + "sprache".
+  • The MULTILINGUAL tokenizer fragments German more aggressively:
+    "Fachhochschule" → 4 tokens ("Fach", "ho", "ch", "schule"), because
+    its 250k vocabulary is shared across 100+ languages — German gets
+    a smaller budget per word.
+  • Both tokenizers use STATISTICAL sub-word splitting (not morphological
+    analysis). The German tokenizer simply has more German-specific
+    entries because its entire vocabulary is dedicated to one language.
+  • Trade-off: the multilingual tokenizer needs more tokens per German
+    word, but it enables CROSS-LINGUAL capabilities (comparing German
+    and English in the same embedding space — see Part 3b).
+  • The rest of this script uses the multilingual model for embeddings.
+""")
+
+
+# ══════════════════════════════════════════════════════════════════════════
+# PART 2: FROM TOKENS TO EMBEDDING VECTORS
+# ══════════════════════════════════════════════════════════════════════════
+print("=" * 70)
+print("PART 2: FROM TOKENS TO EMBEDDING VECTORS")
+print("=" * 70)
+print("""
+Each token ID is looked up in an EMBEDDING TABLE — a large matrix where
+each row is a dense vector (768 dimensions in this model, up to 4096 in
+large LLMs). The transformer then refines these vectors through 12 layers
+of self-attention, producing contextual embeddings where each token's
+vector depends on ALL surrounding tokens.
+""")
+
+example_sentence = "Der Student lernt Programmieren an der Fachhochschule"
+
+inputs = tokenizer(example_sentence, return_tensors="pt").to(DEVICE)
+token_ids = inputs["input_ids"].squeeze().tolist()
+tokens = tokenizer.convert_ids_to_tokens(token_ids)
+
+with torch.no_grad():
+    outputs = model(**inputs)
+
+# outputs.last_hidden_state: shape [1, num_tokens, 768]
+hidden_states = outputs.last_hidden_state.squeeze(0)
+
+print(f'Sentence: "{example_sentence}"\n')
+print(f"{'Pos':>4s}  {'Token':<20s} {'ID':>7s}  {'Vector (first 8 of 768 dims)...'}")
+print("-" * 80)
+
+for i, (tok, tid) in enumerate(zip(tokens, token_ids)):
+    vec = hidden_states[i].cpu().numpy()
+    vec_preview = "  ".join(f"{v:+.3f}" for v in vec[:8])
+    print(f"{i:4d}  {tok:<20s} {tid:7d}  [{vec_preview} ...]")
+
+print(f"""
+KEY OBSERVATIONS:
+  • Each token becomes a vector of {hidden_states.shape[1]} numbers.
+  • These numbers are NOT random — they encode the token's meaning
+    IN CONTEXT. The vector for "Fachhochschule" here is different from
+    the vector for "Fachhochschule" in a different sentence.
+  • The full sentence has {len(tokens)} tokens, producing a matrix of
+    shape [{len(tokens)} × {hidden_states.shape[1]}].
+  • To get a single vector for the whole sentence, we average all
+    token vectors (mean pooling).
+""")
+
+
+# ══════════════════════════════════════════════════════════════════════════
+# PART 3: MEASURING SIMILARITY BETWEEN WORDS
+# ══════════════════════════════════════════════════════════════════════════
+print("=" * 70)
+print("PART 3: WORD AND PHRASE SIMILARITY")
+print("=" * 70)
+print("""
+If embeddings capture meaning, then SIMILAR words should have SIMILAR
+vectors (high cosine similarity) and DIFFERENT words should have
+DIFFERENT vectors (low cosine similarity). Let's test this with German.
+""")
+
+
+def embed_text(text: str) -> torch.Tensor:
+    """Embed a word or phrase into a single normalized vector."""
+    inputs = tokenizer(text, return_tensors="pt", truncation=True,
+                       max_length=128, padding=True).to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    mask = inputs["attention_mask"].unsqueeze(-1)
+    embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
+    return F.normalize(embedding, p=2, dim=1).squeeze(0)
+
+
+# ── 3a: Single word similarities ─────────────────────────────────────────
+print("─── 3a: Single Word Similarities ───\n")
+
+word_pairs = [
+    # Semantically SIMILAR pairs (synonyms or near-synonyms)
+    ("Auto", "Fahrzeug"),          # car / vehicle — near-synonyms
+    ("Arzt", "Doktor"),            # physician / doctor — synonyms
+    ("Programmierer", "Entwickler"),  # programmer / developer
+    ("schnell", "rasch"),          # fast / swift — synonyms
+    ("Haus", "Gebäude"),           # house / building — closely related
+
+    # SAME CATEGORY but different concepts
+    ("Hund", "Katze"),             # dog / cat — both pets, but different!
+    ("Montag", "Freitag"),         # Monday / Friday — both weekdays
+
+    # Semantically UNRELATED pairs
+    ("Hund", "Mathematik"),        # dog vs math
+    ("Auto", "Philosophie"),       # car vs philosophy
+    ("schnell", "Datenbank"),      # fast vs database
+]
+
+print(f"{'Word A':<20s} {'Word B':<20s} {'Cosine Sim':>10s}  {'Relationship'}")
+print("-" * 75)
+
+for w1, w2 in word_pairs:
+    v1, v2 = embed_text(w1), embed_text(w2)
+    sim = torch.dot(v1.cpu(), v2.cpu()).item()
+    if sim > 0.6:
+        rel = "synonyms/close"
+    elif sim > 0.3:
+        rel = "related"
+    else:
+        rel = "unrelated"
+    bar = "█" * int(max(0, sim) * 30)
+    print(f"{w1:<20s} {w2:<20s} {sim:10.3f}  {bar} ({rel})")
+
+print("""
+KEY OBSERVATIONS:
+  → Synonyms (Auto/Fahrzeug, Arzt/Doktor) have HIGHEST similarity.
+  → Same-category but different concepts (Hund/Katze) have MODERATE
+    similarity — they share context (both are pets) but a dog is NOT
+    a cat. The model captures this nuance!
+  → Completely unrelated words (Hund/Mathematik) have LOW similarity.
+  → Embedding similarity reflects MEANING OVERLAP, not just category.
+""")
+
+# ── 3b: Phrase/sentence similarities ─────────────────────────────────────
+print("─── 3b: Phrase and Sentence Similarities ───\n")
+
+phrases = {
+    "ML_de": "Maschinelles Lernen ist ein Teilgebiet der Informatik",
+    "ML_en": "Machine learning is a subfield of computer science",
+    "DL_de": "Deep Learning verwendet neuronale Netze mit vielen Schichten",
+    "Koch":  "Der Koch bereitet das Abendessen in der Küche vor",
+    "Wetter": "Morgen wird es regnen und kalt sein",
+    "Prog":  "Python ist eine beliebte Programmiersprache",
+}
+
+phrase_embeddings = {name: embed_text(text) for name, text in phrases.items()}
+
+names = list(phrases.keys())
+print(f"{'':>10s}", end="")
+for n in names:
+    print(f"{n:>10s}", end="")
+print()
+
+for n1 in names:
+    print(f"{n1:>10s}", end="")
+    for n2 in names:
+        sim = torch.dot(phrase_embeddings[n1].cpu(),
+                        phrase_embeddings[n2].cpu()).item()
+        print(f"{sim:10.3f}", end="")
+    print()
+
+print("""
+KEY OBSERVATIONS:
+  • "Maschinelles Lernen..." (German) and "Machine learning..." (English)
+    should have HIGH similarity — the model understands both languages
+    and maps equivalent meanings to nearby vectors.
+  • ML and Deep Learning sentences should be moderately similar (related
+    topics in computer science).
+  • The cooking sentence and weather sentence should be DISSIMILAR to
+    the tech sentences — completely different topics.
+  • This CROSS-LINGUAL capability is what makes multilingual embeddings
+    so powerful.
+""")
+
+
+# ══════════════════════════════════════════════════════════════════════════
+# PART 4: VISUALIZING THE EMBEDDING SPACE
+# ══════════════════════════════════════════════════════════════════════════
+print("=" * 70)
+print("PART 4: VISUALIZING THE EMBEDDING SPACE")
+print("=" * 70)
+print("""
+768 dimensions are impossible to visualize. We use PCA to project the
+vectors down to 2D while preserving as much structure as possible.
+If the embeddings truly capture meaning, we should see CLUSTERS of
+related words in the 2D plot.
+""")
+
+# Groups of German words organized by semantic category
+word_groups = {
+    "Tiere": ["Hund", "Katze", "Pferd", "Vogel", "Fisch", "Kuh"],
+    "Technik": ["Computer", "Software", "Programmieren", "Datenbank",
+                "Algorithmus", "Internet"],
+    "Essen": ["Brot", "Käse", "Apfel", "Suppe", "Kuchen", "Wurst"],
+    "Natur": ["Berg", "Fluss", "Wald", "See", "Wiese", "Schnee"],
+    "Berufe": ["Arzt", "Lehrer", "Ingenieur", "Koch", "Pilot", "Anwalt"],
+}
+
+all_words = []
+all_categories = []
+all_vectors = []
+
+print("Computing embeddings for word groups...")
+for category, words in word_groups.items():
+    for word in words:
+        vec = embed_text(word).cpu().numpy()
+        all_words.append(word)
+        all_categories.append(category)
+        all_vectors.append(vec)
+    print(f"  {category}: {', '.join(words)}")
+
+X = np.stack(all_vectors)
+print(f"\nEmbedding matrix: {X.shape[0]} words × {X.shape[1]} dimensions")
+
+# ── PCA to 2D ────────────────────────────────────────────────────────────
+pca = PCA(n_components=2)
+X_2d = pca.fit_transform(X)
+
+# ── Plot ──────────────────────────────────────────────────────────────────
+category_names = list(word_groups.keys())
+cmap = plt.cm.Set1
+colors = {cat: cmap(i / len(category_names)) for i, cat in enumerate(category_names)}
+
+fig, ax = plt.subplots(figsize=(12, 9))
+
+for i, (word, cat) in enumerate(zip(all_words, all_categories)):
+    x, y = X_2d[i]
+    ax.scatter(x, y, c=[colors[cat]], s=120, edgecolors="black",
+               linewidth=0.5, zorder=3)
+    ax.annotate(word, (x, y), fontsize=9, ha="center", va="bottom",
+                xytext=(0, 7), textcoords="offset points",
+                fontweight="bold")
+
+for cat in category_names:
+    ax.scatter([], [], c=[colors[cat]], s=100, label=cat,
+               edgecolors="black", linewidth=0.5)
+
+ax.legend(loc="best", fontsize=11, title="Kategorie", title_fontsize=12,
+          framealpha=0.9)
+
+var = pca.explained_variance_ratio_
+ax.set_title(
+    "Deutsche Wörter im Embedding-Raum (768D → 2D via PCA)\n"
+    f"PC1: {var[0]:.1%} Varianz, PC2: {var[1]:.1%} Varianz",
+    fontsize=14, fontweight="bold"
+)
+ax.set_xlabel("Hauptkomponente 1 (PC1)", fontsize=12)
+ax.set_ylabel("Hauptkomponente 2 (PC2)", fontsize=12)
+ax.grid(True, alpha=0.3)
+fig.tight_layout()
+fig.savefig("embedding_space_german.png", dpi=150)
+print(f"\nSaved: embedding_space_german.png")
+
+# ── Second plot: Phrases including cross-lingual ──────────────────────────
+print("\nComputing phrase embeddings for visualization...")
+
+viz_phrases = {
+    # German CS phrases
+    "Maschinelles Lernen": "Technik (DE)",
+    "Neuronale Netze": "Technik (DE)",
+    "Softwareentwicklung": "Technik (DE)",
+    "Künstliche Intelligenz": "Technik (DE)",
+    # English equivalents
+    "Machine Learning": "Technik (EN)",
+    "Neural Networks": "Technik (EN)",
+    "Software Development": "Technik (EN)",
+    "Artificial Intelligence": "Technik (EN)",
+    # German everyday phrases
+    "Guten Morgen": "Alltag (DE)",
+    "Wie geht es Ihnen": "Alltag (DE)",
+    "Das Wetter ist schön": "Alltag (DE)",
+    "Ich gehe einkaufen": "Alltag (DE)",
+    # English everyday phrases
+    "Good morning": "Alltag (EN)",
+    "How are you": "Alltag (EN)",
+    "The weather is nice": "Alltag (EN)",
+    "I am going shopping": "Alltag (EN)",
+}
+
+phrase_labels = list(viz_phrases.keys())
+phrase_cats = list(viz_phrases.values())
+phrase_vecs = np.stack([embed_text(p).cpu().numpy() for p in phrase_labels])
+
+pca2 = PCA(n_components=2)
+P_2d = pca2.fit_transform(phrase_vecs)
+
+cat_colors = {
+    "Technik (DE)": "#1f77b4",
+    "Technik (EN)": "#aec7e8",
+    "Alltag (DE)":  "#d62728",
+    "Alltag (EN)":  "#ff9896",
+}
+
+fig2, ax2 = plt.subplots(figsize=(12, 9))
+
+for i, (label, cat) in enumerate(zip(phrase_labels, phrase_cats)):
+    x, y = P_2d[i]
+    marker = "o" if "(DE)" in cat else "s"  # circle=German, square=English
+    ax2.scatter(x, y, c=cat_colors[cat], s=140, marker=marker,
+                edgecolors="black", linewidth=0.5, zorder=3)
+    ax2.annotate(label, (x, y), fontsize=8, ha="center", va="bottom",
+                 xytext=(0, 8), textcoords="offset points")
+
+for cat, color in cat_colors.items():
+    marker = "o" if "(DE)" in cat else "s"
+    ax2.scatter([], [], c=color, s=100, marker=marker, label=cat,
+                edgecolors="black", linewidth=0.5)
+
+ax2.legend(loc="best", fontsize=10, title="Kategorie & Sprache",
+           title_fontsize=11, framealpha=0.9)
+
+var2 = pca2.explained_variance_ratio_
+ax2.set_title(
+    "Cross-lingual Embeddings: Deutsche & Englische Phrasen\n"
+    f"PC1: {var2[0]:.1%} Varianz, PC2: {var2[1]:.1%} Varianz",
+    fontsize=14, fontweight="bold"
+)
+ax2.set_xlabel("Hauptkomponente 1 (PC1)", fontsize=12)
+ax2.set_ylabel("Hauptkomponente 2 (PC2)", fontsize=12)
+ax2.grid(True, alpha=0.3)
+fig2.tight_layout()
+fig2.savefig("embedding_space_crosslingual.png", dpi=150)
+print(f"Saved: embedding_space_crosslingual.png")
+
+print(f"""
+{'=' * 70}
+SUMMARY: THE FULL PIPELINE
+{'=' * 70}
+
+  Text           →  Tokens          →  Token IDs       →  Embeddings
+  "Fachhochschule"   [▁Fach, ho,        [28356, 497,       [0.012, -0.34,
+                      ch, schule]        206, 72460]         0.88, ...]
+                                                            (768 dimensions)
+
+  1. TOKENIZATION splits text into statistical sub-word pieces.
+     → Splits are based on frequency, not German morphology.
+     → Each token maps to an integer ID from the vocabulary.
+
+  2. EMBEDDING VECTORS are 768-dimensional representations of meaning.
+     → Computed by the transformer's 12 layers of self-attention.
+     → Similar meanings → nearby vectors (high cosine similarity).
+     → Different meanings → distant vectors (low cosine similarity).
+
+  3. COSINE SIMILARITY measures how "aligned" two vectors are.
+     → 1.0 = identical meaning, 0.0 = unrelated, -1.0 = opposite.
+
+  4. CROSS-LINGUAL EMBEDDINGS map equivalent phrases in different
+     languages to nearby vectors. "Maschinelles Lernen" ≈ "Machine
+     Learning" in embedding space.
+
+  5. The SAME PRINCIPLES apply to CODE EMBEDDINGS (next examples):
+     → Code is tokenized into sub-word pieces
+     → A transformer produces embedding vectors
+     → Similar code has similar vectors
+     → This enables semantic code search, clone detection, and RAG
+
+  Check the two PNG files for visual confirmation:
+    • embedding_space_german.png      — German word clusters
+    • embedding_space_crosslingual.png — DE/EN phrase alignment
+""")
diff --git a/Code embeddings/01_basic_embeddings.py b/Code embeddings/01_basic_embeddings.py
new file mode 100644
index 0000000..1b26399
--- /dev/null
+++ b/Code embeddings/01_basic_embeddings.py	
@@ -0,0 +1,231 @@
+"""
+============================================================================
+Example 1: Computing Code Embeddings and Measuring Similarity
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+GOAL:
+    Load a pre-trained code embedding model, embed several code snippets,
+    and compute pairwise cosine similarities to see which snippets the
+    model considers semantically similar.
+
+WHAT YOU WILL LEARN:
+    - How to load a code embedding model with PyTorch
+    - How code is tokenized and converted to vectors
+    - How cosine similarity reveals semantic relationships
+    - That similar functionality → high similarity, different purpose → low
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+import torch.nn.functional as F
+
+# ── Device selection ──────────────────────────────────────────────────────
+# PyTorch supports three backends:
+#   - "cuda"  → NVIDIA GPUs (Linux/Windows)
+#   - "mps"   → Apple Silicon GPUs (macOS M1/M2/M3/M4)
+#   - "cpu"   → always available, slower
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load model and tokenizer ─────────────────────────────────────────────
+# We use st-codesearch-distilroberta-base — a DistilRoBERTa model (82M params)
+# specifically fine-tuned on 1.38M code-comment pairs from CodeSearchNet using
+# contrastive learning. It produces 768-dim embeddings optimized for matching
+# natural language descriptions to code, making it ideal for code search and
+# similarity tasks.
+MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
+
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()  # disable dropout — we want deterministic embeddings
+print("Model loaded.\n")
+
+
+# ── Define code snippets to compare ──────────────────────────────────────
+# We intentionally include:
+#   - Two sorting functions (similar purpose, different implementation)
+#   - A function that does something completely different (JSON parsing)
+#   - A sorting function in a different style (list comprehension)
+snippets = {
+    "bubble_sort": """
+def bubble_sort(arr):
+    n = len(arr)
+    for i in range(n):
+        for j in range(0, n - i - 1):
+            if arr[j] > arr[j + 1]:
+                arr[j], arr[j + 1] = arr[j + 1], arr[j]
+    return arr
+""",
+    "quick_sort": """
+def quick_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    pivot = arr[len(arr) // 2]
+    left = [x for x in arr if x < pivot]
+    middle = [x for x in arr if x == pivot]
+    right = [x for x in arr if x > pivot]
+    return quick_sort(left) + middle + quick_sort(right)
+""",
+    "sorted_builtin": """
+def sort_list(data):
+    return sorted(data)
+""",
+    "parse_json": """
+import json
+
+def parse_config(filepath):
+    with open(filepath, 'r') as f:
+        config = json.load(f)
+    return config
+""",
+    "read_csv": """
+import csv
+
+def read_csv_file(filepath):
+    rows = []
+    with open(filepath, 'r') as f:
+        reader = csv.reader(f)
+        for row in reader:
+            rows.append(row)
+    return rows
+""",
+}
+
+
+def embed_code(code_text: str) -> torch.Tensor:
+    """
+    Convert a code snippet into a single embedding vector.
+
+    This function implements the full pipeline from the lecture:
+        raw code → tokens → token embeddings → single vector → unit vector
+
+    Why a function like this is needed:
+        A transformer model outputs one vector *per token*, but we need a single
+        vector that represents the entire snippet so we can compare snippets using
+        cosine similarity. This function handles tokenization, the forward pass,
+        pooling (many vectors → one), and normalization (arbitrary length → unit).
+
+    Returns:
+        A 768-dimensional unit vector (torch.Tensor) representing the code.
+    """
+
+    # ── Step 1: Tokenization ──────────────────────────────────────────────
+    # The model cannot read raw text. We must split the code into sub-word
+    # tokens and convert each token to its integer ID from the vocabulary.
+    #
+    # The tokenizer also produces an "attention mask": a tensor of 1s and 0s
+    # indicating which positions are real tokens (1) vs. padding (0).
+    # Padding is needed because tensors must have uniform length.
+    #
+    # truncation=True: if the code exceeds 512 tokens, cut it off.
+    # Why 512? This model was trained with a max context of 512 tokens.
+    # Anything beyond that would be out-of-distribution.
+    inputs = tokenizer(
+        code_text,
+        return_tensors="pt",
+        truncation=True,
+        max_length=512,
+        padding=True
+    ).to(DEVICE)
+
+    # ── Step 2: Forward pass through the transformer ──────────────────────
+    # The model processes all tokens through multiple layers of self-attention
+    # (as covered in the lecture). Each layer refines the representation.
+    #
+    # torch.no_grad() disables gradient tracking because we are only doing
+    # inference, not training. This saves memory and speeds things up.
+    #
+    # The output contains a CONTEXTUAL embedding for EACH token:
+    #   outputs.last_hidden_state has shape [1, seq_len, 768]
+    #   → 1 batch, seq_len tokens, each represented as a 768-dim vector.
+    #
+    # These are NOT the static input embeddings — they have been transformed
+    # by the attention mechanism, so each token's vector now encodes context
+    # from ALL other tokens in the sequence.
+    with torch.no_grad():
+        outputs = model(**inputs)
+
+    # ── Step 3: Mean pooling — many token vectors → one snippet vector ────
+    # Problem: we have one 768-dim vector per token, but we need ONE vector
+    # for the entire code snippet (so we can compare it to other snippets).
+    #
+    # Solution: average all token vectors. This is called "mean pooling."
+    #
+    # Subtlety: we must ignore padding tokens. If the code has 30 real tokens
+    # but the tensor was padded to 40, we don't want the 10 zero-vectors from
+    # padding to dilute the average. The attention mask lets us do this:
+    #   1. Multiply each token vector by its mask (1 for real, 0 for padding)
+    #   2. Sum the masked vectors
+    #   3. Divide by the number of real tokens (not the padded length)
+    attention_mask = inputs["attention_mask"].unsqueeze(-1)  # [1, seq_len, 1]
+    masked_output = outputs.last_hidden_state * attention_mask
+    embedding = masked_output.sum(dim=1) / attention_mask.sum(dim=1)
+
+    # ── Step 4: L2 normalization — project onto the unit hypersphere ──────
+    # From the lecture: when vectors are normalized to length 1, cosine
+    # similarity simplifies to the dot product:
+    #
+    #   cos(θ) = (a · b) / (‖a‖ · ‖b‖)  →  if ‖a‖=‖b‖=1  →  cos(θ) = a · b
+    #
+    # This is not just a convenience — it is standard practice in production
+    # embedding systems (OpenAI, Cohere, etc.) because:
+    #   - Dot products are faster to compute than full cosine similarity
+    #   - Vector databases are optimized for dot-product search
+    #   - It removes magnitude differences so we compare direction only
+    embedding = F.normalize(embedding, p=2, dim=1)
+
+    return embedding.squeeze(0)  # remove batch dim → shape: [768]
+
+
+# ── Compute embeddings for all snippets ───────────────────────────────────
+print("Computing embeddings...")
+embeddings = {}
+for name, code in snippets.items():
+    embeddings[name] = embed_code(code)
+    num_tokens = len(tokenizer.encode(code))
+    print(f"  {name:20s} → {num_tokens:3d} tokens → vector of dim {embeddings[name].shape[0]}")
+
+print()
+
+# ── Compute pairwise cosine similarities ──────────────────────────────────
+# cosine_similarity = dot product of unit vectors (we already normalized above)
+names = list(embeddings.keys())
+print("Pairwise Cosine Similarities:")
+print(f"{'':22s}", end="")
+for n in names:
+    print(f"{n:>16s}", end="")
+print()
+
+for i, n1 in enumerate(names):
+    print(f"{n1:22s}", end="")
+    for j, n2 in enumerate(names):
+        sim = torch.dot(embeddings[n1].cpu(), embeddings[n2].cpu()).item()
+        print(f"{sim:16.3f}", end="")
+    print()
+
+# ── Interpretation ────────────────────────────────────────────────────────
+print("\n" + "=" * 70)
+print("INTERPRETATION:")
+print("=" * 70)
+print("""
+- bubble_sort, quick_sort, and sorted_builtin should have HIGH similarity
+  (all perform sorting, despite very different implementations).
+- parse_json and read_csv should be similar to each other (both read files)
+  but DISSIMILAR to the sorting functions (different purpose).
+- This demonstrates that code embeddings capture WHAT code does,
+  not just HOW it looks syntactically.
+""")
diff --git a/Code embeddings/02_text_to_code_search.py b/Code embeddings/02_text_to_code_search.py
new file mode 100644
index 0000000..0c3d85d
--- /dev/null
+++ b/Code embeddings/02_text_to_code_search.py	
@@ -0,0 +1,251 @@
+"""
+============================================================================
+Example 2: Text-to-Code Semantic Search
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+GOAL:
+    Build a mini code search engine: given a natural language query like
+    "sort a list", find the most relevant code snippet from a collection.
+    This is the core mechanism behind semantic code search in tools like
+    Cursor, GitHub Copilot, and code search engines.
+
+WHAT YOU WILL LEARN:
+    - How the SAME embedding model maps both text and code into a shared
+      vector space — this is what makes text-to-code search possible.
+    - How to build a simple search index and query it.
+    - Why embedding-based search beats keyword search for code.
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+import torch.nn.functional as F
+
+# ── Device selection ──────────────────────────────────────────────────────
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load model ────────────────────────────────────────────────────────────
+MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+print("Model loaded.\n")
+
+# ── Code "database" ──────────────────────────────────────────────────────
+# Imagine these are functions in a large codebase that we want to search.
+code_database = [
+    {
+        "name": "binary_search",
+        "code": """
+def binary_search(arr, target):
+    low, high = 0, len(arr) - 1
+    while low <= high:
+        mid = (low + high) // 2
+        if arr[mid] == target:
+            return mid
+        elif arr[mid] < target:
+            low = mid + 1
+        else:
+            high = mid - 1
+    return -1
+"""
+    },
+    {
+        "name": "merge_sort",
+        "code": """
+def merge_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    mid = len(arr) // 2
+    left = merge_sort(arr[:mid])
+    right = merge_sort(arr[mid:])
+    return merge(left, right)
+"""
+    },
+    {
+        "name": "read_json_file",
+        "code": """
+import json
+def read_json_file(path):
+    with open(path, 'r') as f:
+        return json.load(f)
+"""
+    },
+    {
+        "name": "calculate_average",
+        "code": """
+def calculate_average(numbers):
+    if not numbers:
+        return 0.0
+    return sum(numbers) / len(numbers)
+"""
+    },
+    {
+        "name": "connect_database",
+        "code": """
+import sqlite3
+def connect_database(db_path):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    return conn, cursor
+"""
+    },
+    {
+        "name": "send_http_request",
+        "code": """
+import requests
+def send_http_request(url, method='GET', data=None):
+    if method == 'GET':
+        response = requests.get(url)
+    else:
+        response = requests.post(url, json=data)
+    return response.json()
+"""
+    },
+    {
+        "name": "flatten_nested_list",
+        "code": """
+def flatten(nested_list):
+    result = []
+    for item in nested_list:
+        if isinstance(item, list):
+            result.extend(flatten(item))
+        else:
+            result.append(item)
+    return result
+"""
+    },
+    {
+        "name": "count_words",
+        "code": """
+def count_words(text):
+    words = text.lower().split()
+    word_count = {}
+    for word in words:
+        word_count[word] = word_count.get(word, 0) + 1
+    return word_count
+"""
+    },
+    {
+        "name": "validate_email",
+        "code": """
+import re
+def validate_email(email):
+    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
+    return bool(re.match(pattern, email))
+"""
+    },
+    {
+        "name": "fibonacci",
+        "code": """
+def fibonacci(n):
+    if n <= 1:
+        return n
+    a, b = 0, 1
+    for _ in range(2, n + 1):
+        a, b = b, a + b
+    return b
+"""
+    },
+]
+
+
+def embed_text(text: str) -> torch.Tensor:
+    """Embed a piece of text or code into a normalized vector."""
+    inputs = tokenizer(
+        text, return_tensors="pt", truncation=True, max_length=512, padding=True
+    ).to(DEVICE)
+
+    with torch.no_grad():
+        outputs = model(**inputs)
+
+    # Mean pooling over non-padding tokens
+    mask = inputs["attention_mask"].unsqueeze(-1)
+    embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
+    return F.normalize(embedding, p=2, dim=1).squeeze(0)
+
+
+# ── Step 1: Index the code database ───────────────────────────────────────
+# In a real system this would be stored in a vector database (ChromaDB,
+# Pinecone, pgvector). Here we keep it simple with a list of tensors.
+print("Indexing code database...")
+code_vectors = []
+for entry in code_database:
+    vec = embed_text(entry["code"])
+    code_vectors.append(vec)
+    print(f"  Indexed: {entry['name']}")
+
+# Stack into a matrix: shape [num_snippets, embedding_dim]
+code_matrix = torch.stack(code_vectors)
+print(f"\nIndex built: {code_matrix.shape[0]} snippets, {code_matrix.shape[1]} dimensions\n")
+
+
+# ── Step 2: Search with natural language queries ──────────────────────────
+queries = [
+    "sort a list of numbers",
+    "find an element in a sorted array",
+    "compute the mean of a list",
+    "make an HTTP API call",
+    "open and read a JSON file",
+    "check if an email address is valid",
+    "count word frequencies in a string",
+    "generate fibonacci numbers",
+    "connect to a SQL database",
+    "flatten a nested list into a single list",
+]
+
+print("=" * 70)
+print("SEMANTIC CODE SEARCH RESULTS")
+print("=" * 70)
+
+for query in queries:
+    # Embed the natural language query with the SAME model
+    query_vec = embed_text(query)
+
+    # Compute cosine similarity against all code embeddings
+    # Because vectors are normalized, dot product = cosine similarity
+    similarities = torch.mv(code_matrix.cpu(), query_vec.cpu())
+
+    # Rank results by similarity (highest first)
+    ranked_indices = torch.argsort(similarities, descending=True)
+
+    print(f'\nQuery: "{query}"')
+    print(f"  Rank  Score  Function")
+    print(f"  ----  -----  --------")
+    for rank, idx in enumerate(ranked_indices[:3]):  # show top 3
+        score = similarities[idx].item()
+        name = code_database[idx]["name"]
+        marker = " ← best match" if rank == 0 else ""
+        print(f"  {rank+1:4d}  {score:.3f}  {name}{marker}")
+
+print("\n" + "=" * 70)
+print("KEY OBSERVATIONS:")
+print("=" * 70)
+print("""
+1. The model maps NATURAL LANGUAGE queries and CODE into the same vector
+   space. This is why "sort a list" finds merge_sort and "find an element
+   in a sorted array" finds binary_search — even though the queries
+   contain none of the function identifiers.
+
+2. This is fundamentally different from grep/keyword search:
+   - grep "sort" would miss functions named "order" or "arrange"
+   - grep "find element" would miss "binary_search"
+   Embeddings understand MEANING, not just string matching.
+
+3. This is exactly how Cursor, Copilot, and other AI coding tools
+   retrieve relevant code from your project to feed into the LLM.
+""")
diff --git a/Code embeddings/03_cross_language.py b/Code embeddings/03_cross_language.py
new file mode 100644
index 0000000..593a2ed
--- /dev/null
+++ b/Code embeddings/03_cross_language.py	
@@ -0,0 +1,199 @@
+"""
+============================================================================
+Example 3: Cross-Language Code Similarity
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+GOAL:
+    Demonstrate that code embeddings capture FUNCTIONALITY, not syntax.
+    The same algorithm written in Python, JavaScript, Java, and C++
+    should produce similar embedding vectors — even though the surface
+    syntax is completely different.
+
+WHAT YOU WILL LEARN:
+    - Code embedding models create a language-agnostic semantic space.
+    - Functionally equivalent code clusters together regardless of language.
+    - This enables cross-language code search (e.g., find the Java
+      equivalent of a Python function).
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+import torch.nn.functional as F
+
+# ── Device selection ──────────────────────────────────────────────────────
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load model ────────────────────────────────────────────────────────────
+MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+print("Model loaded.\n")
+
+# ── Same algorithm in four languages ──────────────────────────────────────
+# Task A: Factorial — a simple recursive/iterative computation
+# Task B: Reverse a string
+# If embeddings are truly semantic, Task A functions should cluster together
+# and Task B functions should cluster together, regardless of language.
+
+code_snippets = {
+    # ── Task A: Factorial ──
+    "factorial_python": """
+def factorial(n):
+    result = 1
+    for i in range(2, n + 1):
+        result *= i
+    return result
+""",
+    "factorial_javascript": """
+function factorial(n) {
+    let result = 1;
+    for (let i = 2; i <= n; i++) {
+        result *= i;
+    }
+    return result;
+}
+""",
+    "factorial_java": """
+public static int factorial(int n) {
+    int result = 1;
+    for (int i = 2; i <= n; i++) {
+        result *= i;
+    }
+    return result;
+}
+""",
+    "factorial_cpp": """
+int factorial(int n) {
+    int result = 1;
+    for (int i = 2; i <= n; i++) {
+        result *= i;
+    }
+    return result;
+}
+""",
+
+    # ── Task B: Reverse a string ──
+    "reverse_python": """
+def reverse_string(s):
+    return s[::-1]
+""",
+    "reverse_javascript": """
+function reverseString(s) {
+    return s.split('').reverse().join('');
+}
+""",
+    "reverse_java": """
+public static String reverseString(String s) {
+    return new StringBuilder(s).reverse().toString();
+}
+""",
+    "reverse_cpp": """
+std::string reverseString(std::string s) {
+    std::reverse(s.begin(), s.end());
+    return s;
+}
+""",
+}
+
+
+def embed_code(code: str) -> torch.Tensor:
+    """Embed code into a normalized vector."""
+    inputs = tokenizer(
+        code, return_tensors="pt", truncation=True, max_length=512, padding=True
+    ).to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    mask = inputs["attention_mask"].unsqueeze(-1)
+    embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
+    return F.normalize(embedding, p=2, dim=1).squeeze(0)
+
+
+# ── Compute all embeddings ────────────────────────────────────────────────
+print("Computing embeddings for all snippets...")
+embeddings = {}
+for name, code in code_snippets.items():
+    embeddings[name] = embed_code(code)
+print(f"Done. {len(embeddings)} embeddings computed.\n")
+
+# ── Compute similarity matrix ─────────────────────────────────────────────
+names = list(embeddings.keys())
+n = len(names)
+
+print("=" * 70)
+print("CROSS-LANGUAGE SIMILARITY MATRIX")
+print("=" * 70)
+
+# Print header (abbreviated names for readability)
+short_names = [n.replace("factorial_", "F:").replace("reverse_", "R:") for n in names]
+
+print(f"\n{'':14s}", end="")
+for sn in short_names:
+    print(f"{sn:>10s}", end="")
+print()
+
+for i in range(n):
+    print(f"{short_names[i]:14s}", end="")
+    for j in range(n):
+        sim = torch.dot(embeddings[names[i]].cpu(), embeddings[names[j]].cpu()).item()
+        print(f"{sim:10.3f}", end="")
+    print()
+
+# ── Compute average within-task and across-task similarities ──────────────
+factorial_names = [n for n in names if "factorial" in n]
+reverse_names = [n for n in names if "reverse" in n]
+
+within_factorial = []
+within_reverse = []
+across_tasks = []
+
+for i, n1 in enumerate(names):
+    for j, n2 in enumerate(names):
+        if i >= j:
+            continue
+        sim = torch.dot(embeddings[n1].cpu(), embeddings[n2].cpu()).item()
+        if n1 in factorial_names and n2 in factorial_names:
+            within_factorial.append(sim)
+        elif n1 in reverse_names and n2 in reverse_names:
+            within_reverse.append(sim)
+        else:
+            across_tasks.append(sim)
+
+print("\n" + "=" * 70)
+print("ANALYSIS")
+print("=" * 70)
+print(f"\nAvg similarity WITHIN factorial (across languages): "
+      f"{sum(within_factorial)/len(within_factorial):.3f}")
+print(f"Avg similarity WITHIN reverse   (across languages): "
+      f"{sum(within_reverse)/len(within_reverse):.3f}")
+print(f"Avg similarity ACROSS tasks     (factorial vs reverse): "
+      f"{sum(across_tasks)/len(across_tasks):.3f}")
+
+print("""
+EXPECTED RESULT:
+  Within-task similarity should be MUCH HIGHER than across-task similarity.
+  This proves that the embedding model groups code by WHAT IT DOES,
+  not by WHAT LANGUAGE it is written in.
+
+  factorial_python ≈ factorial_java ≈ factorial_cpp ≈ factorial_javascript
+  reverse_python   ≈ reverse_java   ≈ reverse_cpp   ≈ reverse_javascript
+  factorial_*      ≠ reverse_*
+
+  This is what enables cross-language code search: you can find a Java
+  implementation by providing a Python query, or vice versa.
+""")
diff --git a/Code embeddings/04_clone_detection.py b/Code embeddings/04_clone_detection.py
new file mode 100644
index 0000000..03a7c3a
--- /dev/null
+++ b/Code embeddings/04_clone_detection.py	
@@ -0,0 +1,237 @@
+"""
+============================================================================
+Example 4: Code Clone Detection
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+GOAL:
+    Detect code clones (duplicate/similar code) in a collection of
+    functions using embeddings. We simulate a real-world scenario
+    where a codebase contains multiple near-duplicate implementations
+    that should be refactored into a single function.
+
+WHAT YOU WILL LEARN:
+    - The four types of code clones (Type 1–4)
+    - How embeddings detect clones that text-based tools miss
+    - Ranking-based clone detection via cosine similarity
+    - Practical application: finding refactoring opportunities
+
+CLONE TYPES:
+    Type 1: Exact copy (trivial — grep can find these)
+    Type 2: Renamed variables (grep misses these)
+    Type 3: Modified structure (added/removed lines)
+    Type 4: Same functionality, completely different implementation
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+import torch.nn.functional as F
+from itertools import combinations
+
+# ── Device selection ──────────────────────────────────────────────────────
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load model ────────────────────────────────────────────────────────────
+MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+print("Model loaded.\n")
+
+# ── Simulated codebase ────────────────────────────────────────────────────
+# These functions simulate what you'd find in a messy, real-world codebase
+# where different developers wrote similar functionality independently.
+#
+# IMPORTANT: The clone groups share ZERO common words (besides Python
+# keywords). This demonstrates that embeddings capture semantics, not
+# surface-level text overlap. grep would never find these.
+codebase = {
+    # ── Clone group 1: Computing the maximum of a list ──
+    #    Three completely different implementations — no shared identifiers,
+    #    no shared structure, but identical purpose.
+    "utils/find_max.py": """
+def find_max(numbers):
+    result = numbers[0]
+    for candidate in numbers[1:]:
+        if candidate > result:
+            result = candidate
+    return result
+""",
+    "legacy/find_max_old.py": """
+def find_max(numbers):
+    result = numbers[0]
+    for candidate in numbers[1:]:
+        if candidate > result:
+            result = candidate
+    return result
+""",
+    "analytics/top_scorer.py": """
+import heapq
+def fetch_top_element(collection):
+    return heapq.nlargest(1, collection)[0]
+""",
+    "stats/dominant_value.py": """
+def extract_peak(dataset):
+    dataset = sorted(dataset, reverse=True)
+    return dataset[0]
+""",
+
+    # ── Clone group 2: String reversal ──
+    #    Two implementations with zero lexical overlap — slicing vs index-based.
+    "text/flip_text.py": """
+def flip_text(content):
+    return content[::-1]
+""",
+    "helpers/mirror.py": """
+def mirror_characters(phrase):
+    output = []
+    idx = len(phrase) - 1
+    while idx >= 0:
+        output.append(phrase[idx])
+        idx -= 1
+    return ''.join(output)
+""",
+
+    # ── Not a clone: completely different functionality ──
+    # Each uses a different Python construct and domain to ensure
+    # they don't cluster with each other or with the clone groups.
+    "math/square_root.py": """
+def square_root(x):
+    return x ** 0.5
+""",
+    "calendar/leap_year.py": """
+def is_leap_year(year):
+    return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
+""",
+    "formatting/currency.py": """
+def format_currency(amount, symbol="$"):
+    return f"{symbol}{amount:,.2f}"
+""",
+}
+
+
+def embed_code(code: str) -> torch.Tensor:
+    """Embed code into a normalized vector."""
+    inputs = tokenizer(
+        code, return_tensors="pt", truncation=True, max_length=512, padding=True
+    ).to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    mask = inputs["attention_mask"].unsqueeze(-1)
+    embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
+    return F.normalize(embedding, p=2, dim=1).squeeze(0)
+
+
+# ── Embed all functions ───────────────────────────────────────────────────
+print("Embedding all functions in the codebase...")
+embeddings = {}
+for path, code in codebase.items():
+    embeddings[path] = embed_code(code)
+    print(f"  {path}")
+print()
+
+# ── Compute pairwise similarity matrix ────────────────────────────────────
+paths = list(embeddings.keys())
+n = len(paths)
+
+def short_name(path):
+    """Extract a readable label from the file path."""
+    return path.split("/")[-1].replace(".py", "")
+
+labels = [short_name(p) for p in paths]
+
+sim_matrix = {}
+for i in range(n):
+    for j in range(n):
+        sim = torch.dot(embeddings[paths[i]].cpu(), embeddings[paths[j]].cpu()).item()
+        sim_matrix[(i, j)] = sim
+
+# ── Print similarity matrix ───────────────────────────────────────────────
+col_w = max(len(l) for l in labels) + 2
+header_w = col_w
+
+print("=" * 70)
+print("SIMILARITY MATRIX")
+print("=" * 70)
+
+print(f"\n{'':>{header_w}}", end="")
+for label in labels:
+    print(f"{label:>{col_w}}", end="")
+print()
+
+for i in range(n):
+    print(f"{labels[i]:>{header_w}}", end="")
+    for j in range(n):
+        print(f"{sim_matrix[(i, j)]:>{col_w}.3f}", end="")
+    print()
+
+# ── Most similar match per function ───────────────────────────────────────
+print()
+print(f"{'BEST MATCH':>{header_w}}", end="")
+for i in range(n):
+    best_j, best_sim = -1, -1.0
+    for j in range(n):
+        if i != j and sim_matrix[(i, j)] > best_sim:
+            best_sim = sim_matrix[(i, j)]
+            best_j = j
+    print(f"{labels[best_j]:>{col_w}}", end="")
+print()
+
+print(f"{'(similarity)':>{header_w}}", end="")
+for i in range(n):
+    best_sim = max(sim_matrix[(i, j)] for j in range(n) if i != j)
+    print(f"{best_sim:>{col_w}.3f}", end="")
+print()
+
+print(f"""
+{'=' * 70}
+INTERPRETATION:
+{'=' * 70}
+
+HOW TO READ THE TABLE:
+  Each cell shows the cosine similarity between two functions.
+  The BEST MATCH row shows which other function is most similar
+  to each column — these are the clone candidates a developer
+  would investigate.
+
+EXPECTED CLONE GROUPS:
+
+  1. find_max ↔ find_max_old  (Type 1: exact copy)
+     → Similarity ≈ 1.000
+
+  2. find_max / fetch_top_element / extract_peak  (Type 4 clones)
+     → Same purpose (find the largest value), completely different
+       code: for-loop vs heapq.nlargest() vs sorted(reverse=True)
+     → Zero shared identifiers between implementations
+
+  3. flip_text ↔ mirror_characters  (Type 4 clone)
+     → Same purpose (reverse a string), completely different code:
+       slicing ([::-1]) vs while-loop with index
+     → Zero shared identifiers
+
+NON-CLONES:
+  square_root, is_leap_year, format_currency each use a different
+  domain and code structure. Their best matches should have low
+  similarity compared to the clone groups.
+
+KEY INSIGHT:
+  The clone groups share NO common words (besides Python keywords
+  like def/return/if). grep or any text-matching tool would never
+  find these clones. Only semantic understanding — which is what
+  embeddings provide — can detect that these functions do the same
+  thing despite having completely different code.
+""")
diff --git a/Code embeddings/05_visualize_embeddings.py b/Code embeddings/05_visualize_embeddings.py
new file mode 100644
index 0000000..fb8d93a
--- /dev/null
+++ b/Code embeddings/05_visualize_embeddings.py	
@@ -0,0 +1,216 @@
+"""
+============================================================================
+Example 5: Visualizing Code Embeddings with PCA and t-SNE
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+GOAL:
+    Reduce 768-dimensional code embeddings to 2D and plot them.
+    This makes the embedding space visible: you can SEE that similar
+    code clusters together and different code is far apart.
+
+WHAT YOU WILL LEARN:
+    - How PCA projects high-dimensional vectors to 2D (linear reduction)
+    - How t-SNE creates a non-linear 2D map that preserves neighborhoods
+    - How to interpret embedding space visualizations
+    - That code functionality determines position, not syntax or language
+
+OUTPUT:
+    Saves two PNG plots: code_embeddings_pca.png and code_embeddings_tsne.png
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel
+import torch.nn.functional as F
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+import matplotlib.pyplot as plt
+import matplotlib
+
+# Use a non-interactive backend so the script works in headless environments
+matplotlib.use("Agg")
+
+# ── Device selection ──────────────────────────────────────────────────────
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load model ────────────────────────────────────────────────────────────
+MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+print("Model loaded.\n")
+
+# ── Code snippets organized by CATEGORY ───────────────────────────────────
+# Each category represents a type of task. We expect snippets within the
+# same category to cluster together in the embedding space.
+categories = {
+    "Sorting": {
+        "bubble_sort_py": "def bubble_sort(arr):\n    n = len(arr)\n    for i in range(n):\n        for j in range(n-i-1):\n            if arr[j] > arr[j+1]:\n                arr[j], arr[j+1] = arr[j+1], arr[j]\n    return arr",
+        "quick_sort_py": "def quick_sort(a):\n    if len(a) <= 1: return a\n    p = a[0]\n    return quick_sort([x for x in a[1:] if x < p]) + [p] + quick_sort([x for x in a[1:] if x >= p])",
+        "sort_js": "function sortArray(arr) { return arr.sort((a, b) => a - b); }",
+        "insertion_sort": "def insertion_sort(arr):\n    for i in range(1, len(arr)):\n        key = arr[i]\n        j = i - 1\n        while j >= 0 and arr[j] > key:\n            arr[j+1] = arr[j]\n            j -= 1\n        arr[j+1] = key\n    return arr",
+    },
+    "File I/O": {
+        "read_json": "import json\ndef read_json(path):\n    with open(path) as f:\n        return json.load(f)",
+        "write_file": "def write_file(path, content):\n    with open(path, 'w') as f:\n        f.write(content)",
+        "read_csv": "import csv\ndef read_csv(path):\n    with open(path) as f:\n        return list(csv.reader(f))",
+        "read_yaml": "import yaml\ndef read_yaml(path):\n    with open(path) as f:\n        return yaml.safe_load(f)",
+    },
+    "String ops": {
+        "reverse_str": "def reverse(s): return s[::-1]",
+        "capitalize": "def capitalize_words(s): return ' '.join(w.capitalize() for w in s.split())",
+        "count_chars": "def count_chars(s):\n    return {c: s.count(c) for c in set(s)}",
+        "is_palindrome": "def is_palindrome(s): return s == s[::-1]",
+    },
+    "Math": {
+        "factorial": "def factorial(n):\n    r = 1\n    for i in range(2, n+1): r *= i\n    return r",
+        "fibonacci": "def fib(n):\n    a, b = 0, 1\n    for _ in range(n): a, b = b, a+b\n    return a",
+        "gcd": "def gcd(a, b):\n    while b: a, b = b, a % b\n    return a",
+        "is_prime": "def is_prime(n):\n    if n < 2: return False\n    for i in range(2, int(n**0.5)+1):\n        if n % i == 0: return False\n    return True",
+    },
+    "Networking": {
+        "http_get": "import requests\ndef http_get(url): return requests.get(url).json()",
+        "fetch_url": "import urllib.request\ndef fetch(url):\n    with urllib.request.urlopen(url) as r:\n        return r.read().decode()",
+        "post_data": "import requests\ndef post_json(url, data): return requests.post(url, json=data).status_code",
+        "download_file": "import urllib.request\ndef download(url, path): urllib.request.urlretrieve(url, path)",
+    },
+}
+
+
+def embed_code(code: str) -> torch.Tensor:
+    """Embed code into a normalized vector."""
+    inputs = tokenizer(
+        code, return_tensors="pt", truncation=True, max_length=512, padding=True
+    ).to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    mask = inputs["attention_mask"].unsqueeze(-1)
+    embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
+    return F.normalize(embedding, p=2, dim=1).squeeze(0).cpu().numpy()
+
+
+# ── Compute embeddings ────────────────────────────────────────────────────
+print("Computing embeddings...")
+all_embeddings = []
+all_labels = []
+all_categories = []
+
+for category, snippets in categories.items():
+    for label, code in snippets.items():
+        vec = embed_code(code)
+        all_embeddings.append(vec)
+        all_labels.append(label)
+        all_categories.append(category)
+        print(f"  [{category:12s}] {label}")
+
+# Convert to numpy matrix: shape [num_snippets, 768]
+X = np.stack(all_embeddings)
+print(f"\nEmbedding matrix: {X.shape[0]} snippets × {X.shape[1]} dimensions\n")
+
+# ── Color map for categories ──────────────────────────────────────────────
+category_names = list(categories.keys())
+colors = plt.cm.Set1(np.linspace(0, 1, len(category_names)))
+color_map = {cat: colors[i] for i, cat in enumerate(category_names)}
+point_colors = [color_map[cat] for cat in all_categories]
+
+# ── Plot 1: PCA ──────────────────────────────────────────────────────────
+# PCA finds the two directions of maximum variance in the 1024-dim space
+# and projects all points onto those two directions.
+print("Computing PCA (2 components)...")
+pca = PCA(n_components=2)
+X_pca = pca.fit_transform(X)
+
+fig, ax = plt.subplots(figsize=(10, 8))
+for i, (x, y) in enumerate(X_pca):
+    ax.scatter(x, y, c=[point_colors[i]], s=100, edgecolors="black", linewidth=0.5, zorder=3)
+    ax.annotate(all_labels[i], (x, y), fontsize=7, ha="center", va="bottom",
+                xytext=(0, 6), textcoords="offset points")
+
+# Legend
+for cat in category_names:
+    ax.scatter([], [], c=[color_map[cat]], s=80, label=cat, edgecolors="black", linewidth=0.5)
+ax.legend(loc="best", fontsize=9, title="Category", title_fontsize=10)
+
+variance_explained = pca.explained_variance_ratio_
+ax.set_title(f"Code Embeddings — PCA Projection\n"
+             f"(PC1: {variance_explained[0]:.1%} variance, PC2: {variance_explained[1]:.1%} variance)",
+             fontsize=13)
+ax.set_xlabel("Principal Component 1", fontsize=11)
+ax.set_ylabel("Principal Component 2", fontsize=11)
+ax.grid(True, alpha=0.3)
+fig.tight_layout()
+fig.savefig("code_embeddings_pca.png", dpi=150)
+print(f"  Saved: code_embeddings_pca.png")
+print(f"  Variance explained: PC1={variance_explained[0]:.1%}, PC2={variance_explained[1]:.1%}\n")
+
+# ── Plot 2: t-SNE ────────────────────────────────────────────────────────
+# t-SNE is a non-linear method that preserves LOCAL neighborhood structure.
+# Points that are close in 1024-dim space stay close in 2D.
+# Perplexity controls the balance between local and global structure.
+print("Computing t-SNE (this may take a few seconds)...")
+tsne = TSNE(n_components=2, perplexity=5, random_state=42, max_iter=1000)
+X_tsne = tsne.fit_transform(X)
+
+fig, ax = plt.subplots(figsize=(10, 8))
+for i, (x, y) in enumerate(X_tsne):
+    ax.scatter(x, y, c=[point_colors[i]], s=100, edgecolors="black", linewidth=0.5, zorder=3)
+    ax.annotate(all_labels[i], (x, y), fontsize=7, ha="center", va="bottom",
+                xytext=(0, 6), textcoords="offset points")
+
+for cat in category_names:
+    ax.scatter([], [], c=[color_map[cat]], s=80, label=cat, edgecolors="black", linewidth=0.5)
+ax.legend(loc="best", fontsize=9, title="Category", title_fontsize=10)
+
+ax.set_title("Code Embeddings — t-SNE Projection\n"
+             "(non-linear dimensionality reduction)", fontsize=13)
+ax.set_xlabel("t-SNE Dimension 1", fontsize=11)
+ax.set_ylabel("t-SNE Dimension 2", fontsize=11)
+ax.grid(True, alpha=0.3)
+fig.tight_layout()
+fig.savefig("code_embeddings_tsne.png", dpi=150)
+print(f"  Saved: code_embeddings_tsne.png\n")
+
+print("=" * 70)
+print("INTERPRETATION")
+print("=" * 70)
+print(f"""
+Both plots project {X.shape[1]}-dimensional embedding vectors to 2D:
+
+PCA (Principal Component Analysis):
+  - Linear projection onto the two axes of maximum variance.
+  - Preserves global structure: large distances are meaningful.
+  - Good for seeing overall separation between categories.
+  - The % variance tells you how much information is retained.
+
+t-SNE (t-distributed Stochastic Neighbor Embedding):
+  - Non-linear: distorts distances but preserves neighborhoods.
+  - Points that are close in the original space stay close in 2D.
+  - Better at revealing tight clusters within categories.
+  - Distances BETWEEN clusters are not meaningful.
+
+EXPECTED RESULT:
+  You should see 5 distinct clusters, one per category:
+  - Sorting functions (bubble, quick, insertion, JS sort) cluster together
+  - File I/O functions cluster together
+  - String operations cluster together
+  - Math functions cluster together
+  - Networking functions cluster together
+
+  This visually confirms that code embeddings organize code by
+  PURPOSE, not by surface syntax or programming language.
+""")
diff --git a/Code embeddings/06_pca_denoising.py b/Code embeddings/06_pca_denoising.py
new file mode 100644
index 0000000..88981ae
--- /dev/null
+++ b/Code embeddings/06_pca_denoising.py	
@@ -0,0 +1,716 @@
+"""
+============================================================================
+Example 6: PCA Denoising — Can Fewer Dimensions Improve Similarity?
+============================================================================
+AISE501 – AI in Software Engineering I
+Fachhochschule Graubünden
+
+HYPOTHESIS:
+    Embedding vectors live in a 768-dimensional space, but most of the
+    semantic signal may be concentrated in a small number of principal
+    components.  The remaining dimensions could add "noise" that dilutes
+    cosine similarity.  If true, projecting embeddings onto a small PCA
+    subspace should INCREASE similarity within semantic groups and
+    DECREASE similarity across groups — making code search sharper.
+
+WHAT YOU WILL LEARN:
+    - How PCA decomposes the embedding space into ranked components
+    - How to measure retrieval quality (intra- vs inter-group similarity)
+    - Whether dimensionality reduction helps or hurts in practice
+    - The concept of an "optimal" embedding dimension for a given task
+
+OUTPUT:
+    Saves pca_denoising_analysis.png with three sub-plots.
+
+HARDWARE:
+    Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
+============================================================================
+"""
+
+import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel
+import torch.nn.functional as F
+from sklearn.decomposition import PCA
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use("Agg")
+
+# ── Device selection ──────────────────────────────────────────────────────
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+
+DEVICE = get_device()
+print(f"Using device: {DEVICE}\n")
+
+# ── Load model ────────────────────────────────────────────────────────────
+MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
+print(f"Loading model: {MODEL_NAME} ...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
+model.eval()
+print("Model loaded.\n")
+
+# ── Code snippets organized into semantic GROUPS ──────────────────────────
+# We need clear groups so we can measure intra-group vs inter-group similarity.
+groups = {
+    "Sorting": {
+        "bubble_sort": """
+def bubble_sort(arr):
+    n = len(arr)
+    for i in range(n):
+        for j in range(0, n - i - 1):
+            if arr[j] > arr[j + 1]:
+                arr[j], arr[j + 1] = arr[j + 1], arr[j]
+    return arr""",
+        "quick_sort": """
+def quick_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    pivot = arr[len(arr) // 2]
+    left = [x for x in arr if x < pivot]
+    middle = [x for x in arr if x == pivot]
+    right = [x for x in arr if x > pivot]
+    return quick_sort(left) + middle + quick_sort(right)""",
+        "merge_sort": """
+def merge_sort(arr):
+    if len(arr) <= 1:
+        return arr
+    mid = len(arr) // 2
+    left = merge_sort(arr[:mid])
+    right = merge_sort(arr[mid:])
+    merged = []
+    i = j = 0
+    while i < len(left) and j < len(right):
+        if left[i] <= right[j]:
+            merged.append(left[i]); i += 1
+        else:
+            merged.append(right[j]); j += 1
+    return merged + left[i:] + right[j:]""",
+        "insertion_sort": """
+def insertion_sort(arr):
+    for i in range(1, len(arr)):
+        key = arr[i]
+        j = i - 1
+        while j >= 0 and arr[j] > key:
+            arr[j + 1] = arr[j]
+            j -= 1
+        arr[j + 1] = key
+    return arr""",
+        "selection_sort": """
+def selection_sort(arr):
+    for i in range(len(arr)):
+        min_idx = i
+        for j in range(i + 1, len(arr)):
+            if arr[j] < arr[min_idx]:
+                min_idx = j
+        arr[i], arr[min_idx] = arr[min_idx], arr[i]
+    return arr""",
+        "heap_sort": """
+def heap_sort(arr):
+    import heapq
+    heapq.heapify(arr)
+    return [heapq.heappop(arr) for _ in range(len(arr))]""",
+    },
+    "File I/O": {
+        "read_json": """
+import json
+def read_json(path):
+    with open(path, 'r') as f:
+        return json.load(f)""",
+        "write_file": """
+def write_file(path, content):
+    with open(path, 'w') as f:
+        f.write(content)""",
+        "read_csv": """
+import csv
+def read_csv(path):
+    with open(path, 'r') as f:
+        reader = csv.reader(f)
+        return list(reader)""",
+        "read_yaml": """
+import yaml
+def load_yaml(path):
+    with open(path, 'r') as f:
+        return yaml.safe_load(f)""",
+        "write_json": """
+import json
+def write_json(path, data):
+    with open(path, 'w') as f:
+        json.dump(data, f, indent=2)""",
+        "read_lines": """
+def read_lines(path):
+    with open(path, 'r') as f:
+        return f.readlines()""",
+    },
+    "Math": {
+        "factorial": """
+def factorial(n):
+    if n <= 1:
+        return 1
+    return n * factorial(n - 1)""",
+        "fibonacci": """
+def fibonacci(n):
+    a, b = 0, 1
+    for _ in range(n):
+        a, b = b, a + b
+    return a""",
+        "gcd": """
+def gcd(a, b):
+    while b:
+        a, b = b, a % b
+    return a""",
+        "is_prime": """
+def is_prime(n):
+    if n < 2:
+        return False
+    for i in range(2, int(n**0.5) + 1):
+        if n % i == 0:
+            return False
+    return True""",
+        "power": """
+def power(base, exp):
+    if exp == 0:
+        return 1
+    if exp % 2 == 0:
+        half = power(base, exp // 2)
+        return half * half
+    return base * power(base, exp - 1)""",
+        "sum_digits": """
+def sum_digits(n):
+    total = 0
+    while n > 0:
+        total += n % 10
+        n //= 10
+    return total""",
+    },
+    "Networking": {
+        "http_get": """
+import requests
+def http_get(url):
+    response = requests.get(url)
+    return response.json()""",
+        "post_data": """
+import requests
+def post_data(url, payload):
+    response = requests.post(url, json=payload)
+    return response.status_code, response.json()""",
+        "fetch_url": """
+import urllib.request
+def fetch_url(url):
+    with urllib.request.urlopen(url) as resp:
+        return resp.read().decode('utf-8')""",
+        "download_file": """
+import urllib.request
+def download_file(url, dest):
+    urllib.request.urlretrieve(url, dest)
+    return dest""",
+        "http_put": """
+import requests
+def http_put(url, data):
+    response = requests.put(url, json=data)
+    return response.status_code""",
+        "http_delete": """
+import requests
+def http_delete(url):
+    response = requests.delete(url)
+    return response.status_code""",
+    },
+    "String ops": {
+        "reverse_str": """
+def reverse_string(s):
+    return s[::-1]""",
+        "is_palindrome": """
+def is_palindrome(s):
+    s = s.lower().replace(' ', '')
+    return s == s[::-1]""",
+        "count_vowels": """
+def count_vowels(s):
+    return sum(1 for c in s.lower() if c in 'aeiou')""",
+        "capitalize_words": """
+def capitalize_words(s):
+    return ' '.join(w.capitalize() for w in s.split())""",
+        "remove_duplicates": """
+def remove_duplicate_chars(s):
+    seen = set()
+    result = []
+    for c in s:
+        if c not in seen:
+            seen.add(c)
+            result.append(c)
+    return ''.join(result)""",
+        "count_words": """
+def count_words(text):
+    words = text.lower().split()
+    freq = {}
+    for w in words:
+        freq[w] = freq.get(w, 0) + 1
+    return freq""",
+    },
+    "Data structures": {
+        "stack_push_pop": """
+class Stack:
+    def __init__(self):
+        self.items = []
+    def push(self, item):
+        self.items.append(item)
+    def pop(self):
+        return self.items.pop()""",
+        "queue_impl": """
+from collections import deque
+class Queue:
+    def __init__(self):
+        self.items = deque()
+    def enqueue(self, item):
+        self.items.append(item)
+    def dequeue(self):
+        return self.items.popleft()""",
+        "linked_list": """
+class Node:
+    def __init__(self, val):
+        self.val = val
+        self.next = None
+class LinkedList:
+    def __init__(self):
+        self.head = None
+    def append(self, val):
+        node = Node(val)
+        if not self.head:
+            self.head = node
+            return
+        curr = self.head
+        while curr.next:
+            curr = curr.next
+        curr.next = node""",
+        "binary_tree": """
+class TreeNode:
+    def __init__(self, val):
+        self.val = val
+        self.left = None
+        self.right = None
+def inorder(root):
+    if root:
+        yield from inorder(root.left)
+        yield root.val
+        yield from inorder(root.right)""",
+        "hash_map": """
+class HashMap:
+    def __init__(self, size=256):
+        self.buckets = [[] for _ in range(size)]
+    def put(self, key, value):
+        idx = hash(key) % len(self.buckets)
+        for i, (k, v) in enumerate(self.buckets[idx]):
+            if k == key:
+                self.buckets[idx][i] = (key, value)
+                return
+        self.buckets[idx].append((key, value))""",
+        "priority_queue": """
+import heapq
+class PriorityQueue:
+    def __init__(self):
+        self.heap = []
+    def push(self, priority, item):
+        heapq.heappush(self.heap, (priority, item))
+    def pop(self):
+        return heapq.heappop(self.heap)[1]""",
+    },
+}
+
+
+def embed_code(code: str) -> torch.Tensor:
+    """Embed code into a normalized vector."""
+    inputs = tokenizer(
+        code, return_tensors="pt", truncation=True, max_length=512, padding=True
+    ).to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    mask = inputs["attention_mask"].unsqueeze(-1)
+    embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
+    return F.normalize(embedding, p=2, dim=1).squeeze(0)
+
+
+# ── Step 1: Compute all embeddings ────────────────────────────────────────
+print("Computing embeddings...")
+all_names = []
+all_labels = []
+all_vectors = []
+
+for group_name, snippets in groups.items():
+    for snippet_name, code in snippets.items():
+        vec = embed_code(code).cpu().numpy()
+        all_names.append(snippet_name)
+        all_labels.append(group_name)
+        all_vectors.append(vec)
+        print(f"  [{group_name:12s}] {snippet_name}")
+
+X = np.stack(all_vectors)  # shape: [N, 768]
+N, D = X.shape
+print(f"\nEmbedding matrix: {N} snippets × {D} dimensions\n")
+
+# ── Step 2: Define similarity metrics ─────────────────────────────────────
+def cosine_matrix(vectors):
+    """Compute pairwise cosine similarity for L2-normalized vectors."""
+    norms = np.linalg.norm(vectors, axis=1, keepdims=True)
+    norms = np.maximum(norms, 1e-10)
+    normed = vectors / norms
+    return normed @ normed.T
+
+def compute_metrics(sim_matrix, labels):
+    """
+    Compute intra-group (same category) and inter-group (different category)
+    average similarities. The GAP between them measures discriminability.
+    """
+    intra_sims = []
+    inter_sims = []
+    n = len(labels)
+    for i in range(n):
+        for j in range(i + 1, n):
+            if labels[i] == labels[j]:
+                intra_sims.append(sim_matrix[i, j])
+            else:
+                inter_sims.append(sim_matrix[i, j])
+    intra_mean = np.mean(intra_sims)
+    inter_mean = np.mean(inter_sims)
+    gap = intra_mean - inter_mean
+    return intra_mean, inter_mean, gap
+
+
+# ── Step 3: Sweep across PCA dimensions ──────────────────────────────────
+# PCA can have at most min(N, D) components; cap accordingly
+max_components = min(N, D)
+dims_to_test = sorted(set(
+    k for k in [2, 3, 5, 8, 10, 15, 20, 30, 50, 75, 100, 150, 200,
+                300, 400, 500, 600, D]
+    if k <= max_components
+))
+dims_to_test.append(D)  # always include full dimensionality as baseline
+
+print("=" * 70)
+print("PCA DENOISING EXPERIMENT")
+print("=" * 70)
+print(f"\n{'Components':>12s}  {'Intra-Group':>12s}  {'Inter-Group':>12s}  "
+      f"{'Gap':>8s}  {'vs Full':>8s}")
+print("-" * 62)
+
+results = []
+for k in dims_to_test:
+    if k >= D:
+        # Full dimensionality — no PCA, just use original vectors
+        X_reduced = X.copy()
+        actual_k = D
+    else:
+        pca = PCA(n_components=k, random_state=42)
+        X_reduced = pca.fit_transform(X)
+        actual_k = k
+
+    sim = cosine_matrix(X_reduced)
+    intra, inter, gap = compute_metrics(sim, all_labels)
+    results.append((actual_k, intra, inter, gap))
+
+# Compute full-dim gap for comparison
+full_intra, full_inter, full_gap = results[-1][1], results[-1][2], results[-1][3]
+
+for k, intra, inter, gap in results:
+    delta = gap - full_gap
+    delta_str = f"{delta:+.4f}" if k < D else "  (base)"
+    print(f"{k:>12d}  {intra:>12.4f}  {inter:>12.4f}  {gap:>8.4f}  {delta_str:>8s}")
+
+# ── Step 4: Find the optimal dimensionality ──────────────────────────────
+dims_arr = np.array([r[0] for r in results])
+gaps_arr = np.array([r[3] for r in results])
+best_idx = np.argmax(gaps_arr)
+best_k, best_gap = int(dims_arr[best_idx]), gaps_arr[best_idx]
+
+print(f"\n{'=' * 70}")
+print(f"BEST DIMENSIONALITY: {best_k} components")
+print(f"  Gap (intra - inter): {best_gap:.4f}  vs  {full_gap:.4f} at full 768-d")
+print(f"  Improvement: {best_gap - full_gap:+.4f}")
+print(f"{'=' * 70}")
+
+# ── Step 5: Show detailed comparison at optimal k vs full ────────────────
+print(f"\n── Detailed Similarity Matrix at k={best_k} vs k={D} ──\n")
+
+if best_k < D:
+    pca_best = PCA(n_components=best_k, random_state=42)
+    X_best = pca_best.fit_transform(X)
+else:
+    X_best = X.copy()
+
+sim_full = cosine_matrix(X)
+sim_best = cosine_matrix(X_best)
+
+# Show a selection of interesting pairs
+print(f"{'Snippet A':>20s}  {'Snippet B':>20s}  {'Full 768d':>10s}  "
+      f"{'PCA {0}d'.format(best_k):>10s}  {'Change':>8s}")
+print("-" * 78)
+
+interesting_pairs = [
+    # Intra-group: should be high
+    ("bubble_sort", "quick_sort"),
+    ("bubble_sort", "merge_sort"),
+    ("read_json", "read_csv"),
+    ("http_get", "fetch_url"),
+    ("factorial", "fibonacci"),
+    ("reverse_str", "is_palindrome"),
+    ("stack_push_pop", "queue_impl"),
+    # Inter-group: should be low
+    ("bubble_sort", "read_json"),
+    ("factorial", "http_get"),
+    ("reverse_str", "download_file"),
+    ("is_prime", "write_file"),
+    ("stack_push_pop", "count_vowels"),
+]
+
+for n1, n2 in interesting_pairs:
+    i = all_names.index(n1)
+    j = all_names.index(n2)
+    s_full = sim_full[i, j]
+    s_best = sim_best[i, j]
+    same = all_labels[i] == all_labels[j]
+    marker = "SAME" if same else "DIFF"
+    change = s_best - s_full
+    print(f"{n1:>20s}  {n2:>20s}  {s_full:>10.4f}  {s_best:>10.4f}  "
+          f"{change:>+8.4f}  [{marker}]")
+
+
+# ── Step 6: Text-to-code search comparison ────────────────────────────────
+print(f"\n── Text-to-Code Search: Full 768d vs PCA {best_k}d ──\n")
+
+search_queries = [
+    ("sort a list of numbers", "Sorting"),
+    ("read a JSON config file", "File I/O"),
+    ("compute factorial recursively", "Math"),
+    ("make an HTTP GET request", "Networking"),
+    ("check if a number is prime", "Math"),
+]
+
+if best_k < D:
+    pca_search = PCA(n_components=best_k, random_state=42)
+    X_search = pca_search.fit_transform(X)
+else:
+    X_search = X.copy()
+    pca_search = None
+
+for query, expected_group in search_queries:
+    q_vec = embed_code(query).cpu().numpy().reshape(1, -1)
+
+    # Full dimension search
+    q_norm = q_vec / np.linalg.norm(q_vec)
+    X_norm = X / np.linalg.norm(X, axis=1, keepdims=True)
+    scores_full = (X_norm @ q_norm.T).flatten()
+
+    # PCA-reduced search
+    if pca_search is not None:
+        q_reduced = pca_search.transform(q_vec)
+    else:
+        q_reduced = q_vec.copy()
+    q_r_norm = q_reduced / np.linalg.norm(q_reduced)
+    X_s_norm = X_search / np.linalg.norm(X_search, axis=1, keepdims=True)
+    scores_pca = (X_s_norm @ q_r_norm.T).flatten()
+
+    top_full = np.argsort(-scores_full)[:3]
+    top_pca = np.argsort(-scores_pca)[:3]
+
+    print(f'  Query: "{query}"')
+    print(f'    Full 768d:  {all_names[top_full[0]]:>16s} ({scores_full[top_full[0]]:.3f})'
+          f'  {all_names[top_full[1]]:>16s} ({scores_full[top_full[1]]:.3f})'
+          f'  {all_names[top_full[2]]:>16s} ({scores_full[top_full[2]]:.3f})')
+    print(f'    PCA {best_k:>3d}d:  {all_names[top_pca[0]]:>16s} ({scores_pca[top_pca[0]]:.3f})'
+          f'  {all_names[top_pca[1]]:>16s} ({scores_pca[top_pca[1]]:.3f})'
+          f'  {all_names[top_pca[2]]:>16s} ({scores_pca[top_pca[2]]:.3f})')
+
+    full_correct = all_labels[top_full[0]] == expected_group
+    pca_correct = all_labels[top_pca[0]] == expected_group
+    print(f'    Full correct: {full_correct}  |  PCA correct: {pca_correct}')
+    print()
+
+
+# ── Step 7: Visualization ─────────────────────────────────────────────────
+# Six-panel figure for a comprehensive visual analysis.
+
+group_colors = {
+    "Sorting": "#1f77b4", "File I/O": "#ff7f0e", "Math": "#2ca02c",
+    "Networking": "#d62728", "String ops": "#9467bd", "Data structures": "#8c564b",
+}
+label_colors = [group_colors[g] for g in all_labels]
+unique_groups = list(dict.fromkeys(all_labels))
+
+fig = plt.figure(figsize=(20, 13))
+fig.suptitle("PCA Denoising Analysis — Can Fewer Dimensions Improve Code Similarity?",
+             fontsize=15, fontweight="bold", y=0.98)
+
+# ── Row 1 ──
+
+# Plot 1: Intra/inter similarity vs number of PCA components
+ax1 = fig.add_subplot(2, 3, 1)
+dims_plot = [r[0] for r in results]
+intra_plot = [r[1] for r in results]
+inter_plot = [r[2] for r in results]
+ax1.fill_between(dims_plot, inter_plot, intra_plot, alpha=0.15, color="tab:green")
+ax1.plot(dims_plot, intra_plot, "o-", color="tab:blue", linewidth=2,
+         label="Intra-group (same category)", markersize=6)
+ax1.plot(dims_plot, inter_plot, "s-", color="tab:red", linewidth=2,
+         label="Inter-group (different category)", markersize=6)
+ax1.axvline(x=best_k, color="green", linestyle="--", alpha=0.7,
+            label=f"Best gap at k={best_k}")
+ax1.set_xlabel("Number of PCA Components", fontsize=10)
+ax1.set_ylabel("Average Cosine Similarity", fontsize=10)
+ax1.set_title("(a) Intra- vs Inter-Group Similarity", fontsize=11, fontweight="bold")
+ax1.legend(fontsize=7, loc="center right")
+ax1.set_xscale("log")
+ax1.grid(True, alpha=0.3)
+
+# Plot 2: Gap (discriminability) vs number of PCA components
+ax2 = fig.add_subplot(2, 3, 2)
+gaps_plot = [r[3] for r in results]
+ax2.plot(dims_plot, gaps_plot, "D-", color="tab:green", linewidth=2, markersize=7)
+ax2.axvline(x=best_k, color="green", linestyle="--", alpha=0.7,
+            label=f"Best k={best_k} (gap={best_gap:.3f})")
+ax2.axhline(y=full_gap, color="gray", linestyle=":", alpha=0.7,
+            label=f"Full 768d (gap={full_gap:.3f})")
+ax2.fill_between(dims_plot, full_gap, gaps_plot, alpha=0.12, color="tab:green",
+                 where=[g > full_gap for g in gaps_plot])
+ax2.set_xlabel("Number of PCA Components", fontsize=10)
+ax2.set_ylabel("Gap (Intra − Inter)", fontsize=10)
+ax2.set_title("(b) Discriminability vs Dimensionality", fontsize=11, fontweight="bold")
+ax2.legend(fontsize=8)
+ax2.set_xscale("log")
+ax2.grid(True, alpha=0.3)
+
+# Plot 3: Cumulative variance explained
+pca_full = PCA(n_components=min(N, D), random_state=42)
+pca_full.fit(X)
+cumvar = np.cumsum(pca_full.explained_variance_ratio_) * 100
+ax3 = fig.add_subplot(2, 3, 3)
+ax3.plot(range(1, len(cumvar) + 1), cumvar, "-", color="tab:purple", linewidth=2)
+ax3.axvline(x=best_k, color="green", linestyle="--", alpha=0.7,
+            label=f"Best k={best_k}")
+for threshold in [90, 95, 99]:
+    k_thresh = np.searchsorted(cumvar, threshold) + 1
+    if k_thresh <= len(cumvar):
+        ax3.axhline(y=threshold, color="gray", linestyle=":", alpha=0.4)
+        ax3.annotate(f"{threshold}% → k={k_thresh}", xy=(k_thresh, threshold),
+                     fontsize=8, color="gray", ha="left",
+                     xytext=(k_thresh + 1, threshold - 2))
+ax3.set_xlabel("Number of PCA Components", fontsize=10)
+ax3.set_ylabel("Cumulative Variance Explained (%)", fontsize=10)
+ax3.set_title("(c) Variance Concentration", fontsize=11, fontweight="bold")
+ax3.legend(fontsize=8)
+ax3.set_xscale("log")
+ax3.grid(True, alpha=0.3)
+
+# ── Row 2 ──
+
+# Plot 4 & 5: Side-by-side heatmaps (full vs PCA-denoised)
+# Sort indices by group for a block-diagonal structure
+sorted_idx = sorted(range(N), key=lambda i: all_labels[i])
+sorted_names = [all_names[i] for i in sorted_idx]
+sorted_labels = [all_labels[i] for i in sorted_idx]
+
+sim_full_sorted = sim_full[np.ix_(sorted_idx, sorted_idx)]
+sim_best_sorted = sim_best[np.ix_(sorted_idx, sorted_idx)]
+
+for panel_idx, (mat, title_str) in enumerate([
+    (sim_full_sorted, f"(d) Similarity Heatmap — Full 768d"),
+    (sim_best_sorted, f"(e) Similarity Heatmap — PCA {best_k}d (Denoised)"),
+]):
+    ax = fig.add_subplot(2, 3, 4 + panel_idx)
+    im = ax.imshow(mat, cmap="RdBu_r", vmin=-1, vmax=1, aspect="auto")
+    ax.set_xticks(range(N))
+    ax.set_yticks(range(N))
+    ax.set_xticklabels(sorted_names, rotation=90, fontsize=5)
+    ax.set_yticklabels(sorted_names, fontsize=5)
+
+    # Draw group boundary lines
+    prev_label = sorted_labels[0]
+    for i, lab in enumerate(sorted_labels):
+        if lab != prev_label:
+            ax.axhline(y=i - 0.5, color="black", linewidth=1)
+            ax.axvline(x=i - 0.5, color="black", linewidth=1)
+            prev_label = lab
+
+    ax.set_title(title_str, fontsize=11, fontweight="bold")
+    plt.colorbar(im, ax=ax, shrink=0.8, label="Cosine Similarity")
+
+# Plot 6: Bar chart comparing specific pairs at full vs PCA
+ax6 = fig.add_subplot(2, 3, 6)
+pair_labels = []
+full_scores = []
+pca_scores = []
+pair_colors = []
+
+for n1, n2 in interesting_pairs:
+    i = all_names.index(n1)
+    j = all_names.index(n2)
+    pair_labels.append(f"{n1}\nvs {n2}")
+    full_scores.append(sim_full[i, j])
+    pca_scores.append(sim_best[i, j])
+    pair_colors.append("#2ca02c" if all_labels[i] == all_labels[j] else "#d62728")
+
+y_pos = np.arange(len(pair_labels))
+bar_h = 0.35
+bars_full = ax6.barh(y_pos + bar_h / 2, full_scores, bar_h, label="Full 768d",
+                     color="tab:blue", alpha=0.7)
+bars_pca = ax6.barh(y_pos - bar_h / 2, pca_scores, bar_h, label=f"PCA {best_k}d",
+                    color="tab:orange", alpha=0.7)
+
+# Color labels by same/different group
+for i, (yl, col) in enumerate(zip(pair_labels, pair_colors)):
+    ax6.annotate("●", xy=(-0.05, y_pos[i]), fontsize=10, color=col,
+                 ha="right", va="center", fontweight="bold",
+                 annotation_clip=False)
+
+ax6.set_yticks(y_pos)
+ax6.set_yticklabels(pair_labels, fontsize=6)
+ax6.set_xlabel("Cosine Similarity", fontsize=10)
+ax6.set_title("(f) Pair Comparison: Full vs PCA Denoised", fontsize=11, fontweight="bold")
+ax6.legend(fontsize=8)
+ax6.axvline(x=0, color="black", linewidth=0.5)
+ax6.set_xlim(-1.1, 1.1)
+ax6.grid(True, axis="x", alpha=0.3)
+ax6.invert_yaxis()
+
+# Custom legend for the dots
+from matplotlib.lines import Line2D
+dot_legend = [Line2D([0], [0], marker="o", color="w", markerfacecolor="#2ca02c",
+                     markersize=8, label="Same group"),
+              Line2D([0], [0], marker="o", color="w", markerfacecolor="#d62728",
+                     markersize=8, label="Different group")]
+ax6.legend(handles=[bars_full, bars_pca] + dot_legend, fontsize=7, loc="lower right")
+
+plt.tight_layout(rect=[0, 0, 1, 0.96])
+plt.savefig("pca_denoising_analysis.png", dpi=150, bbox_inches="tight")
+print(f"\nSaved: pca_denoising_analysis.png")
+
+# ── Summary ───────────────────────────────────────────────────────────────
+print(f"""
+{'=' * 70}
+CONCLUSIONS
+{'=' * 70}
+
+1. VARIANCE CONCENTRATION:
+   The first few PCA components capture a disproportionate amount of
+   variance.  This means the embedding space has low effective
+   dimensionality — most of the 768 dimensions are semi-redundant.
+
+2. DENOISING EFFECT:
+   At k={best_k}, the gap between intra-group and inter-group similarity
+   is {best_gap:.4f} (vs {full_gap:.4f} at full 768d).
+   {'PCA denoising IMPROVED discriminability by removing noisy dimensions.' if best_gap > full_gap else 'Full dimensionality was already optimal for this dataset.'}
+
+3. PRACTICAL IMPLICATIONS:
+   - For retrieval (code search), moderate PCA reduction can sharpen
+     results while also reducing storage and computation.
+   - Too few dimensions (k=2,3) lose important signal.
+   - Too many dimensions may retain noise that dilutes similarity.
+   - The "sweet spot" depends on the dataset and task.
+
+4. TRADE-OFF:
+   PCA denoising is a post-hoc technique.  Newer embedding models are
+   trained with Matryoshka Representation Learning (MRL) that makes
+   the FIRST k dimensions maximally informative by design.
+""")
diff --git a/Code embeddings/README.md b/Code embeddings/README.md
new file mode 100644
index 0000000..121c383
--- /dev/null
+++ b/Code embeddings/README.md	
@@ -0,0 +1,93 @@
+# Code Embeddings — Hands-On Examples
+
+**AISE501 – AI in Software Engineering I**
+Fachhochschule Graubünden — Spring Semester 2026
+
+## Overview
+
+Seven self-contained Python programs that demonstrate how embedding
+models work. Each script loads a pre-trained model, embeds text or code
+snippets, and explores a different capability of embeddings.
+
+| # | Script | What it demonstrates |
+|---|--------|---------------------|
+| 0 | `00_tokens_and_embeddings_intro.py` | Tokenization basics and general text embeddings (German) |
+| 1 | `01_basic_embeddings.py` | Compute code embeddings and pairwise cosine similarity |
+| 2 | `02_text_to_code_search.py` | Semantic search: find code from natural language queries |
+| 3 | `03_cross_language.py` | Same algorithm in 4 languages → similar embeddings |
+| 4 | `04_clone_detection.py` | Detect duplicate/similar code in a simulated codebase |
+| 5 | `05_visualize_embeddings.py` | PCA and t-SNE plots of the embedding space |
+| 6 | `06_pca_denoising.py` | PCA denoising: fewer dimensions can improve similarity |
+
+## Setup
+
+### 1. Create a virtual environment (recommended)
+
+```bash
+python -m venv venv
+
+# macOS / Linux
+source venv/bin/activate
+
+# Windows
+venv\Scripts\activate
+```
+
+### 2. Install dependencies
+
+```bash
+pip install -r requirements.txt
+```
+
+**PyTorch GPU support:**
+
+- **Apple Silicon Mac (M1/M2/M3/M4):** MPS acceleration works
+  out of the box with the standard PyTorch install. No extra steps needed.
+- **NVIDIA GPU (Windows/Linux):** Install the CUDA version of PyTorch.
+  See https://pytorch.org/get-started/locally/ for the correct command
+  for your CUDA version.
+- **CPU only:** Everything works on CPU too, just a bit slower.
+
+### 3. Run any example
+
+```bash
+python 00_tokens_and_embeddings_intro.py
+python 01_basic_embeddings.py
+python 02_text_to_code_search.py
+python 03_cross_language.py
+python 04_clone_detection.py
+python 05_visualize_embeddings.py
+python 06_pca_denoising.py
+```
+
+The first run will download the model (~300 MB). Subsequent runs
+use the cached model.
+
+## Model
+
+All code embedding examples (01–06) use **st-codesearch-distilroberta-base**
+(82M parameters), a DistilRoBERTa model fine-tuned on 1.38 million
+code-comment pairs from CodeSearchNet using contrastive learning
+(MultipleNegativesRankingLoss). It produces 768-dimensional embedding
+vectors optimized for matching natural language descriptions to code,
+making it ideal for semantic code search and similarity tasks.
+
+The introductory example (00) uses **paraphrase-multilingual-mpnet-base-v2**
+for demonstrating general language embeddings with German text.
+
+## Hardware Requirements
+
+- **RAM:** 1 GB free (for the model)
+- **Disk:** ~500 MB (for the downloaded model, cached in `~/.cache/huggingface/`)
+- **GPU:** Optional — all scripts auto-detect and use:
+  - CUDA (NVIDIA GPUs)
+  - MPS (Apple Silicon)
+  - CPU (fallback)
+
+## Expected Output
+
+Each script prints structured output with explanations. Example 5
+saves two PNG images (`code_embeddings_pca.png` and
+`code_embeddings_tsne.png`) showing the embedding space. Example 6
+saves `pca_denoising_analysis.png` with three sub-plots analyzing
+optimal embedding dimensions.
diff --git a/Code embeddings/code_embeddings_pca.png b/Code embeddings/code_embeddings_pca.png
new file mode 100644
index 0000000..88b4b75
Binary files /dev/null and b/Code embeddings/code_embeddings_pca.png differ
diff --git a/Code embeddings/code_embeddings_tsne.png b/Code embeddings/code_embeddings_tsne.png
new file mode 100644
index 0000000..842df8d
Binary files /dev/null and b/Code embeddings/code_embeddings_tsne.png differ
diff --git a/Code embeddings/embedding_space_crosslingual.png b/Code embeddings/embedding_space_crosslingual.png
new file mode 100644
index 0000000..a9c5553
Binary files /dev/null and b/Code embeddings/embedding_space_crosslingual.png differ
diff --git a/Code embeddings/embedding_space_german.png b/Code embeddings/embedding_space_german.png
new file mode 100644
index 0000000..11d84f3
Binary files /dev/null and b/Code embeddings/embedding_space_german.png differ
diff --git a/Code embeddings/pca_denoising_analysis.png b/Code embeddings/pca_denoising_analysis.png
new file mode 100644
index 0000000..8461c6d
Binary files /dev/null and b/Code embeddings/pca_denoising_analysis.png differ
diff --git a/Code embeddings/requirements.txt b/Code embeddings/requirements.txt
new file mode 100644
index 0000000..c903590
--- /dev/null
+++ b/Code embeddings/requirements.txt	
@@ -0,0 +1,6 @@
+torch
+transformers
+sentence-transformers
+scikit-learn
+matplotlib
+numpy
diff --git a/Prompting Exercise/.DS_Store b/Prompting Exercise/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/Prompting Exercise/.DS_Store differ
diff --git a/Prompting Exercise/analyze_me.py b/Prompting Exercise/analyze_me.py
new file mode 100644
index 0000000..d2b8b83
--- /dev/null
+++ b/Prompting Exercise/analyze_me.py	
@@ -0,0 +1,67 @@
+"""
+analyze_me.py  –  A data-processing script used in Exercise 2
+==============================================================
+This file contains several realistic bugs and style issues.
+Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
+
+Can you spot the issues yourself before asking the LLM?
+"""
+
+
+def calculate_statistics(numbers):
+    total = 0
+    for n in numbers:
+        total = total + n
+    average = total / len(numbers)       # Bug 1: ZeroDivisionError when list is empty
+
+    min_val = numbers[0]                 # Bug 2: IndexError when list is empty
+    max_val = numbers[0]
+    for n in numbers:
+        if n < min_val:
+            min_val = n
+        if n > max_val:
+            max_val = n
+
+    variance = 0
+    for n in numbers:
+        variance = variance + (n - average) ** 2
+    variance = variance / len(numbers)   # Bug 3: population variance (÷N), not sample variance (÷N-1)
+
+    return {
+        "count":    len(numbers),
+        "sum":      total,
+        "average":  average,
+        "min":      min_val,
+        "max":      max_val,
+        "variance": variance,
+    }
+
+
+def process_data(filename):
+    numbers = []
+    f = open(filename)                   # Bug 4: no context manager (file may not be closed on error)
+    for line in f:
+        numbers.append(int(line.strip()))  # Bug 5: int() crashes on floats and blank lines
+    f.close()
+
+    result = calculate_statistics(numbers)
+    print("Statistics:", result)
+    return result
+
+
+def normalize(numbers, method="minmax"):
+    if method == "minmax":
+        mn = min(numbers)
+        mx = max(numbers)
+        return [(x - mn) / mx - mn for x in numbers]  # Bug 6: operator-precedence error
+    elif method == "zscore":
+        stats = calculate_statistics(numbers)
+        std = stats["variance"] ** 0.5
+        return [(x - stats["average"]) / std for x in numbers]
+    else:
+        print("Unknown normalisation method")          # Bug 7: should raise ValueError, not just print
+
+
+if __name__ == "__main__":
+    sample = [4, 8, 15, 16, 23, 42]
+    print(calculate_statistics(sample))
diff --git a/Prompting Exercise/analyze_me_blind.py b/Prompting Exercise/analyze_me_blind.py
new file mode 100644
index 0000000..27cb4b3
--- /dev/null
+++ b/Prompting Exercise/analyze_me_blind.py	
@@ -0,0 +1,67 @@
+"""
+analyze_me.py  –  A data-processing script used in Exercise 2
+==============================================================
+This file contains several realistic bugs and style issues.
+Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
+
+Can you spot the issues yourself before asking the LLM?
+"""
+
+
+def calculate_statistics(numbers):
+    total = 0
+    for n in numbers:
+        total = total + n
+    average = total / len(numbers)
+
+    min_val = numbers[0]
+    max_val = numbers[0]
+    for n in numbers:
+        if n < min_val:
+            min_val = n
+        if n > max_val:
+            max_val = n
+
+    variance = 0
+    for n in numbers:
+        variance = variance + (n - average) ** 2
+    variance = variance / len(numbers)
+
+    return {
+        "count":    len(numbers),
+        "sum":      total,
+        "average":  average,
+        "min":      min_val,
+        "max":      max_val,
+        "variance": variance,
+    }
+
+
+def process_data(filename):
+    numbers = []
+    f = open(filename)
+    for line in f:
+        numbers.append(int(line.strip()))
+    f.close()
+
+    result = calculate_statistics(numbers)
+    print("Statistics:", result)
+    return result
+
+
+def normalize(numbers, method="minmax"):
+    if method == "minmax":
+        mn = min(numbers)
+        mx = max(numbers)
+        return [(x - mn) / mx - mn for x in numbers]
+    elif method == "zscore":
+        stats = calculate_statistics(numbers)
+        std = stats["variance"] ** 0.5
+        return [(x - stats["average"]) / std for x in numbers]
+    else:
+        print("Unknown normalisation method")
+
+
+if __name__ == "__main__":
+    sample = [4, 8, 15, 16, 23, 42]
+    print(calculate_statistics(sample))
diff --git a/Prompting Exercise/analyze_me_blind_fix.py b/Prompting Exercise/analyze_me_blind_fix.py
new file mode 100644
index 0000000..517a90e
--- /dev/null
+++ b/Prompting Exercise/analyze_me_blind_fix.py	
@@ -0,0 +1,89 @@
+import sys
+
+"""
+analyze_me.py  –  A data-processing script used in Exercise 2
+==============================================================
+This file contains several realistic bugs and style issues.
+Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
+
+Can you spot the issues yourself before asking the LLM?
+"""
+
+
+def calculate_statistics(numbers):
+    if not numbers:
+        raise ValueError("Cannot calculate statistics for an empty list.")
+
+    total = 0
+    for n in numbers:
+        total = total + n
+    average = total / len(numbers)
+
+    min_val = numbers[0]
+    max_val = numbers[0]
+    for n in numbers:
+        if n < min_val:
+            min_val = n
+        if n > max_val:
+            max_val = n
+
+    variance = 0
+    for n in numbers:
+        variance = variance + (n - average) ** 2
+    variance = variance / len(numbers)
+
+    return {
+        "count":    len(numbers),
+        "sum":      total,
+        "average":  average,
+        "min":      min_val,
+        "max":      max_val,
+        "variance": variance,
+    }
+
+
+def process_data(filename):
+    numbers = []
+    try:
+        with open(filename, 'r') as file_handle:
+            for line in file_handle:
+                stripped_line = line.strip()
+                if stripped_line:
+                    numbers.append(int(stripped_line))
+    except FileNotFoundError:
+        print(f"Error: File '{filename}' not found.")
+        raise
+    except ValueError as e:
+        print(f"Error: Invalid integer in file: {e}")
+        raise
+
+    result = calculate_statistics(numbers)
+    print("Statistics:", result)
+    return result
+
+
+def normalize(numbers, method="minmax"):
+    if not numbers:
+        raise ValueError("Cannot normalize an empty list.")
+
+    if method == "minmax":
+        mn = min(numbers)
+        mx = max(numbers)
+        if mx == mn:
+            return [0.0 for _ in numbers]
+        return [(x - mn) / (mx - mn) for x in numbers]
+    elif method == "zscore":
+        stats = calculate_statistics(numbers)
+        std = stats["variance"] ** 0.5
+        if std == 0:
+            return [0.0 for _ in numbers]
+        return [(x - stats["average"]) / std for x in numbers]
+    else:
+        print("Unknown normalization method")
+        return []
+
+
+if __name__ == "__main__":
+    sample = [4, 8, 15, 16, 23, 42]
+    print(calculate_statistics(sample))
+
diff --git a/Prompting Exercise/analyze_me_direct.py b/Prompting Exercise/analyze_me_direct.py
new file mode 100644
index 0000000..9897271
--- /dev/null
+++ b/Prompting Exercise/analyze_me_direct.py	
@@ -0,0 +1,192 @@
+"""
+analyze_me.py – A data-processing script used in Exercise 2
+==============================================================
+This module provides robust functions for calculating statistics,
+processing data files, and normalizing numeric lists.
+
+All functions include PEP-484 type hints and NumPy-style docstrings.
+"""
+
+from typing import List, Dict, Union, Any
+
+
+def calculate_statistics(numbers: List[Union[int, float]]) -> Dict[str, Any]:
+    """
+    Calculate basic statistics for a list of numbers.
+
+    Parameters
+    ----------
+    numbers : List[Union[int, float]]
+        The list of numeric values to analyze.
+
+    Returns
+    -------
+    Dict[str, Any]
+        A dictionary containing count, sum, average, min, max, and variance.
+        If the input list is empty, returns a dictionary with zero values
+        for all fields except count (which is 0).
+
+    Notes
+    -----
+    - Variance is calculated using the sample variance formula (dividing by N-1).
+    - If the list is empty, the function returns early to avoid division by zero
+      or index errors.
+    """
+    count = len(numbers)
+    
+    if count == 0:
+        return {
+            "count": 0,
+            "sum": 0.0,
+            "average": 0.0,
+            "min": 0.0,
+            "max": 0.0,
+            "variance": 0.0,
+        }
+
+    total = sum(numbers)
+    average = total / count
+    
+    min_val = min(numbers)
+    max_val = max(numbers)
+
+    # Calculate sample variance (divide by N-1)
+    variance_sum = sum((n - average) ** 2 for n in numbers)
+    variance = variance_sum / (count - 1)
+
+    return {
+        "count": count,
+        "sum": total,
+        "average": average,
+        "min": min_val,
+        "max": max_val,
+        "variance": variance,
+    }
+
+
+def process_data(filename: str) -> Dict[str, Any]:
+    """
+    Read numeric data from a file and calculate statistics.
+
+    Parameters
+    ----------
+    filename : str
+        Path to the input file containing one number per line.
+        Blank lines and non-numeric lines are skipped.
+
+    Returns
+    -------
+    Dict[str, Any]
+        The statistics dictionary returned by calculate_statistics().
+
+    Raises
+    ------
+    FileNotFoundError
+        If the specified file does not exist.
+    ValueError
+        If the file cannot be read or contains no valid numbers.
+    """
+    numbers: List[Union[int, float]] = []
+    
+    try:
+        with open(filename, 'r') as f:
+            for line in f:
+                stripped = line.strip()
+                if not stripped:
+                    continue
+                try:
+                    # Attempt to parse as float to handle both int and float
+                    numbers.append(float(stripped))
+                except ValueError:
+                    # Skip non-numeric lines
+                    continue
+    except FileNotFoundError:
+        raise FileNotFoundError(f"File not found: {filename}")
+    except IOError as e:
+        raise IOError(f"Error reading file {filename}: {e}")
+
+    if not numbers:
+        raise ValueError(f"No valid numeric data found in {filename}")
+
+    result = calculate_statistics(numbers)
+    print("Statistics:", result)
+    return result
+
+
+def normalize(numbers: List[Union[int, float]], method: str = "minmax") -> List[float]:
+    """
+    Normalize a list of numbers using the specified method.
+
+    Parameters
+    ----------
+    numbers : List[Union[int, float]]
+        The list of numeric values to normalize.
+    method : str, optional
+        The normalization method to use. Options are:
+        - "minmax": Min-Max normalization to [0, 1]
+        - "zscore": Z-score normalization (standardization)
+
+    Returns
+    -------
+    List[float]
+        The normalized list of numbers.
+
+    Raises
+    ------
+    ValueError
+        If an unknown normalization method is provided, or if the list is empty.
+    ZeroDivisionError
+        If the range is zero for minmax or standard deviation is zero for zscore.
+    """
+    if not numbers:
+        raise ValueError("Cannot normalize an empty list.")
+
+    if method == "minmax":
+        mn = min(numbers)
+        mx = max(numbers)
+        range_val = mx - mn
+        if range_val == 0:
+            # If all values are the same, return zeros or handle as needed
+            return [0.0 for _ in numbers]
+        return [(x - mn) / range_val for x in numbers]
+    
+    elif method == "zscore":
+        stats = calculate_statistics(numbers)
+        std = stats["variance"] ** 0.5
+        if std == 0:
+            # If standard deviation is zero, all values are the same
+            return [0.0 for _ in numbers]
+        return [(x - stats["average"]) / std for x in numbers]
+    
+    else:
+        raise ValueError(f"Unknown normalization method: '{method}'. "
+                         f"Supported methods: 'minmax', 'zscore'.")
+
+
+if __name__ == "__main__":
+    # Basic sanity checks
+    sample = [4, 8, 15, 16, 23, 42]
+    
+    print("Testing calculate_statistics:")
+    stats = calculate_statistics(sample)
+    print(stats)
+    
+    print("\nTesting normalize (minmax):")
+    normalized_minmax = normalize(sample, "minmax")
+    print(normalized_minmax)
+    
+    print("\nTesting normalize (zscore):")
+    normalized_zscore = normalize(sample, "zscore")
+    print(normalized_zscore)
+    
+    print("\nTesting empty list handling:")
+    empty_stats = calculate_statistics([])
+    print(empty_stats)
+    
+    print("\nTesting unknown method error:")
+    try:
+        normalize(sample, "unknown")
+    except ValueError as e:
+        print(f"Caught expected error: {e}")
+    
+    print("\nAll sanity checks passed!")
\ No newline at end of file
diff --git a/Prompting Exercise/analyze_me_fix.py b/Prompting Exercise/analyze_me_fix.py
new file mode 100644
index 0000000..cf7df3f
--- /dev/null
+++ b/Prompting Exercise/analyze_me_fix.py	
@@ -0,0 +1,89 @@
+import sys
+
+"""
+analyze_me.py  –  A data-processing script used in Exercise 2
+==============================================================
+This file contains several realistic bugs and style issues.
+Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
+
+Can you spot the issues yourself before asking the LLM?
+"""
+
+
+def calculate_statistics(numbers):
+    if not numbers:
+        return {
+            "count": 0,
+            "sum": 0,
+            "average": 0.0,
+            "min": None,
+            "max": None,
+            "variance": 0.0,
+        }
+
+    total = 0
+    for n in numbers:
+        total = total + n
+    average = total / len(numbers)
+
+    min_val = numbers[0]
+    max_val = numbers[0]
+    for n in numbers:
+        if n < min_val:
+            min_val = n
+        if n > max_val:
+            max_val = n
+
+    variance = 0
+    for n in numbers:
+        variance = variance + (n - average) ** 2
+    variance = variance / (len(numbers) - 1) if len(numbers) > 1 else 0.0
+
+    return {
+        "count": len(numbers),
+        "sum": total,
+        "average": average,
+        "min": min_val,
+        "max": max_val,
+        "variance": variance,
+    }
+
+
+def process_data(filename):
+    numbers = []
+    with open(filename) as file_handle:
+        for line in file_handle:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                numbers.append(float(stripped))
+            except ValueError:
+                continue
+
+    result = calculate_statistics(numbers)
+    print("Statistics:", result)
+    return result
+
+
+def normalize(numbers, method="minmax"):
+    if method == "minmax":
+        mn = min(numbers)
+        mx = max(numbers)
+        if mx == mn:
+            return [0.0 for _ in numbers]
+        return [(x - mn) / (mx - mn) for x in numbers]
+    elif method == "zscore":
+        stats = calculate_statistics(numbers)
+        std = stats["variance"] ** 0.5
+        if std == 0:
+            return [0.0 for _ in numbers]
+        return [(x - stats["average"]) / std for x in numbers]
+    else:
+        raise ValueError(f"Unknown normalization method: {method}")
+
+
+if __name__ == "__main__":
+    sample = [4, 8, 15, 16, 23, 42]
+    print(calculate_statistics(sample))
+
diff --git a/Prompting Exercise/analyze_me_fixed.py b/Prompting Exercise/analyze_me_fixed.py
new file mode 100644
index 0000000..ca0fd98
--- /dev/null
+++ b/Prompting Exercise/analyze_me_fixed.py	
@@ -0,0 +1,216 @@
+"""
+analyze_me.py  –  A data-processing script used in Exercise 2
+==============================================================
+This file contains several realistic bugs and style issues.
+Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
+
+Can you spot the issues yourself before asking the LLM?
+"""
+
+
+def calculate_statistics(numbers: list[float]) -> dict[str, float]:
+    """
+    Calculate basic statistical measures for a list of numbers.
+
+    This function computes the count, sum, average, minimum, maximum, and
+    variance (population variance) of the provided list of numbers.
+
+    Parameters
+    ----------
+    numbers : list[float]
+        A list of numeric values to analyze.
+
+    Returns
+    -------
+    dict[str, float]
+        A dictionary containing the following keys:
+        - 'count': The number of elements in the list.
+        - 'sum': The sum of all elements.
+        - 'average': The arithmetic mean of the elements.
+        - 'min': The minimum value in the list.
+        - 'max': The maximum value in the list.
+        - 'variance': The population variance of the elements.
+
+    Raises
+    ------
+    ZeroDivisionError
+        If the input list is empty, division by zero will occur when
+        calculating the average and variance.
+    IndexError
+        If the input list is empty, accessing the first element for min/max
+        will raise an error.
+    """
+    # Step 2 – Implement empty list handling in calculate_statistics
+    if not numbers:
+        return {
+            "count": 0,
+            "sum": 0.0,
+            "average": 0.0,
+            "min": 0.0,
+            "max": 0.0,
+            "variance": 0.0,
+        }
+
+    total = 0
+    for n in numbers:
+        total = total + n
+    average = total / len(numbers)       # Bug 1: ZeroDivisionError when list is empty
+
+    min_val = numbers[0]                 # Bug 2: IndexError when list is empty
+    max_val = numbers[0]
+    for n in numbers:
+        if n < min_val:
+            min_val = n
+        if n > max_val:
+            max_val = n
+
+    variance = 0
+    for n in numbers:
+        variance = variance + (n - average) ** 2
+    
+    # Step 3 – Correct variance calculation to use sample variance
+    count = len(numbers)
+    if count > 1:
+        variance = variance / (count - 1)
+    else:
+        variance = 0.0
+
+    return {
+        "count":    len(numbers),
+        "sum":      total,
+        "average":  average,
+        "min":      min_val,
+        "max":      max_val,
+        "variance": variance,
+    }
+
+
+# Step 4 – Define type hints and docstrings for process_data
+def process_data(filename: str) -> dict[str, float]:
+    """
+    Read numeric data from a file and compute statistics.
+
+    This function opens a text file, reads each line, converts it to an integer,
+    and collects the values into a list. It then passes this list to
+    calculate_statistics to compute and return the statistical summary.
+
+    Parameters
+    ----------
+    filename : str
+        The path to the text file containing one number per line.
+
+    Returns
+    -------
+    dict[str, float]
+        A dictionary containing the statistical measures computed from the file data.
+
+    Raises
+    ------
+    FileNotFoundError
+        If the specified file does not exist.
+    ValueError
+        If a line in the file cannot be converted to an integer.
+    """
+    numbers = []
+    # Step 5 – Implement context manager and robust line parsing in process_data
+    with open(filename) as f:
+        for line in f:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                # Attempt to convert to float first to handle both ints and floats
+                value = float(stripped)
+                numbers.append(value)
+            except ValueError:
+                # Skip lines that cannot be converted to a number
+                continue
+
+    result = calculate_statistics(numbers)
+    print("Statistics:", result)
+    return result
+
+
+# Step 6 – Define type hints and docstrings for normalize
+def normalize(numbers: list[float], method: str = "minmax") -> list[float]:
+    """
+    Normalize a list of numbers using the specified method.
+
+    This function applies either 'minmax' scaling or 'zscore' standardization
+    to the input list of numbers.
+
+    Parameters
+    ----------
+    numbers : list[float]
+        A list of numeric values to normalize.
+    method : str, optional
+        The normalization method to use. Options are:
+        - 'minmax': Scales values to the range [0, 1].
+        - 'zscore': Standardizes values to have mean 0 and standard deviation 1.
+        Default is 'minmax'.
+
+    Returns
+    -------
+    list[float]
+        A list of normalized values.
+
+    Raises
+    ------
+    ValueError
+        If an unknown normalization method is provided.
+    ZeroDivisionError
+        If 'minmax' is used on a list where all values are identical (range is 0),
+        or if 'zscore' is used on a list with zero standard deviation.
+
+    Examples
+    --------
+    >>> normalize([1, 2, 3, 4, 5])
+    [0.0, 0.25, 0.5, 0.75, 1.0]
+    """
+    if method == "minmax":
+        mn = min(numbers)
+        mx = max(numbers)
+        # Step 7 – Fix operator precedence bug in minmax normalization
+        return [(x - mn) / (mx - mn) for x in numbers]
+    elif method == "zscore":
+        stats = calculate_statistics(numbers)
+        std = stats["variance"] ** 0.5
+        return [(x - stats["average"]) / std for x in numbers]
+    else:
+        # Step 8 – Replace print statement with ValueError for unknown methods
+        raise ValueError(f"Unknown normalisation method: {method}")
+
+
+if __name__ == "__main__":
+    # Step 9 – Implement and verify main block sanity checks
+    sample = [4, 8, 15, 16, 23, 42]
+    stats = calculate_statistics(sample)
+    
+    # Verify expected values for sample data
+    expected_sum = 4 + 8 + 15 + 16 + 23 + 42
+    expected_count = 6
+    expected_avg = expected_sum / expected_count
+    
+    assert stats["count"] == expected_count, f"Count mismatch: {stats['count']} != {expected_count}"
+    assert stats["sum"] == expected_sum, f"Sum mismatch: {stats['sum']} != {expected_sum}"
+    assert abs(stats["average"] - expected_avg) < 1e-9, f"Average mismatch: {stats['average']} != {expected_avg}"
+    assert stats["min"] == 4, f"Min mismatch: {stats['min']} != 4"
+    assert stats["max"] == 42, f"Max mismatch: {stats['max']} != 42"
+    
+    # Test empty list handling
+    empty_stats = calculate_statistics([])
+    assert empty_stats["count"] == 0, "Empty list count should be 0"
+    assert empty_stats["sum"] == 0.0, "Empty list sum should be 0.0"
+    assert empty_stats["average"] == 0.0, "Empty list average should be 0.0"
+    assert empty_stats["min"] == 0.0, "Empty list min should be 0.0"
+    assert empty_stats["max"] == 0.0, "Empty list max should be 0.0"
+    assert empty_stats["variance"] == 0.0, "Empty list variance should be 0.0"
+    
+    # Test normalization
+    normalized = normalize([1, 2, 3, 4, 5])
+    expected_normalized = [0.0, 0.25, 0.5, 0.75, 1.0]
+    assert len(normalized) == 5, "Normalized list length mismatch"
+    for i, val in enumerate(normalized):
+        assert abs(val - expected_normalized[i]) < 1e-9, f"Normalized value mismatch at index {i}"
+    
+    print("All sanity checks passed!")
\ No newline at end of file
diff --git a/Prompting Exercise/ex01_xml_prompting.py b/Prompting Exercise/ex01_xml_prompting.py
new file mode 100644
index 0000000..0ccc6e8
--- /dev/null
+++ b/Prompting Exercise/ex01_xml_prompting.py	
@@ -0,0 +1,142 @@
+"""
+Exercise 1 – Basic XML Structured Prompting
+============================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+
+Learning goals
+--------------
+* Connect to the local LLM server and send your first prompt.
+* Understand the difference between unstructured and XML-structured prompts.
+* See how structure helps the model parse and prioritise different parts
+  of your request.
+
+Tasks
+-----
+Part A  Run the unstructured prompt (already done for you). Read the response.
+Part B  Complete the XML-structured version of the same request (TODOs 1-3).
+Part C  Add a system prompt to set the response style (TODOs 4-5).
+
+"""
+
+from server_utils import chat, get_client, print_messages, print_separator
+
+client = get_client()
+
+
+# ── Part A: Unstructured (Zero-Shot) Prompt ───────────────────────────────────
+# This section is complete. Run it, read the response, then move on.
+
+print_separator("Part A – Unstructured Prompt")
+
+unstructured_messages = [
+    {
+        "role": "user",
+        "content": (
+            "Explain what a Python list comprehension is, "
+            "give an example that filters even numbers from a list, "
+            "and list two common mistakes beginners make."
+        ),
+    }
+]
+
+# print_messages(unstructured_messages)   # ← always inspect what you send!
+# response_a = chat(client, unstructured_messages)
+# print(response_a)
+
+
+# ── Part B: Structured Prompt with XML Tags ───────────────────────────────────
+# Use XML tags to structure the same request more precisely.
+# Named sections help the model parse and prioritise your intent.
+
+print_separator("Part B – Structured Prompt with XML Tags")
+
+# TODO 1:  Fill in the three XML sections below.
+#          Use the same topic as Part A but make each section specific.
+#
+#   <topic>    – the Python concept to explain
+#   <example>  – what the code example should demonstrate
+#   <focus>    – two or three specific points you want covered in the answer
+#
+# Tip: XML tag names are arbitrary — choose names that make sense to a
+#      human reader and the model will understand them too.
+
+structured_content = """\
+<request>
+  <topic>
+    Python list comprehensions
+  </topic>
+  <example>
+    Filter even numbers from a list
+  </example>
+  <focus>
+    Syntax overview and two common beginner mistakes
+  </focus>
+</request>"""
+
+# TODO 2:  Build the messages list.
+#          Use structured_content as the content of a "user" message.
+#
+# Reminder: messages is a list of dicts with keys "role" and "content".
+#           "role" is one of "system", "user", or "assistant".
+
+structured_messages = [
+    # TODO: add the user message dict here
+    {
+        "role": "user",
+        "content": structured_content,
+    }
+]
+
+# TODO 3:  Call chat() with structured_messages, store the result, print it.
+#          Compare the output with response_a above.
+#          Always call print_messages() before chat() to see the full prompt.
+
+# print_messages(structured_messages)
+# response_b = chat(client, structured_messages)
+# print(response_b)
+
+
+# ── Part C: Adding a System Prompt ────────────────────────────────────────────
+# A system prompt lets you define a persona and global rules for every
+# response in the conversation without repeating yourself each time.
+
+print_separator("Part C – Adding a System Prompt")
+
+# TODO 4:  Write an XML-structured system prompt that defines:
+#            <persona>      – who the LLM should be
+#            <style>        – tone and formatting rules
+#            <constraints>  – length or content limits
+#
+# Example persona: "experienced Python tutor who always shows code first"
+
+system_content = """\
+<request>
+  <persona>You are a master python developer and teacher</persona>
+  <style>You follow the PEP 8 style guide</style>
+  <constraints>Format your response in json</constraints>
+</request>
+"""
+
+# TODO 5:  Build a messages list that puts the system prompt FIRST (role="system"),
+#          followed by the structured user message from Part B.
+#          Call chat() and print the result.
+#
+# Reflection: How did the system prompt change the answer compared to Part B?
+
+messages_c = [
+    {"role": "system", "content": system_content},
+    {"role": "user", "content": structured_content}
+]
+print_messages(messages_c)
+response_c = chat(client, messages_c)
+print(response_c)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. How did XML structure change the format and depth of the response?\n"
+    "2. What happens if you use inconsistent or missing closing tags?\n"
+    "3. When would you NOT bother with XML structure?\n"
+    "4. How does the system prompt interact with the user message?\n"
+)
diff --git a/Prompting Exercise/ex01_xml_prompting_solution.py b/Prompting Exercise/ex01_xml_prompting_solution.py
new file mode 100644
index 0000000..79ad237
--- /dev/null
+++ b/Prompting Exercise/ex01_xml_prompting_solution.py	
@@ -0,0 +1,91 @@
+"""
+Exercise 1 – SOLUTION – Basic XML Structured Prompting
+=======================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+"""
+
+from server_utils import chat, get_client, print_messages, print_separator
+
+client = get_client()
+temperature_value=0.3
+
+# ── Part A: Unstructured (Zero-Shot) Prompt ───────────────────────────────────
+print_separator("Part A – Unstructured Prompt")
+
+unstructured_messages = [
+    {
+        "role": "user",
+        "content": (
+            "Explain what a Python list comprehension is, "
+            "give an example that filters even numbers from a list, "
+            "and list two common mistakes beginners make."
+        ),
+    }
+]
+
+print_messages(unstructured_messages)
+response_a = chat(client, unstructured_messages)
+print(response_a)
+
+
+# ── Part B: Structured Prompt with XML Tags ───────────────────────────────────
+print_separator("Part B – Structured Prompt with XML Tags")
+
+structured_content = """\
+<request>
+  <topic>
+    Python list comprehensions
+  </topic>
+  <example>
+    A list comprehension that takes a list of integers and returns only
+    the even numbers, using a conditional filter expression.
+  </example>
+  <focus>
+    1. The general syntax: [expression for item in iterable if condition]
+    2. Two common beginner mistakes when writing list comprehensions
+  </focus>
+</request>"""
+
+structured_messages = [
+    {"role": "user", "content": structured_content}
+]
+
+print_messages(structured_messages)
+response_b = chat(client, structured_messages, temperature=temperature_value)
+print(response_b)
+
+
+# ── Part C: Adding a System Prompt ────────────────────────────────────────────
+print_separator("Part C – Adding a System Prompt")
+
+system_content = """\
+<persona>
+  You are an experienced Python tutor. You teach Python to university students
+  who have basic programming knowledge but are new to idiomatic Python.
+</persona>
+<style>
+  Always show a working code snippet first, then explain it step by step.
+  Use plain language. Avoid jargon without defining it. Write python in PEP8 style
+</style>
+<constraints>
+  Keep each answer under 200 words. Use at most one code block per response.
+</constraints>"""
+
+messages_c = [
+    {"role": "system", "content": system_content},
+    {"role": "user",   "content": structured_content},
+]
+
+print_messages(messages_c)
+response_c = chat(client, messages_c,temperature=temperature_value)
+print(response_c)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. How did XML structure change the format and depth of the response?\n"
+    "2. What happens if you use inconsistent or missing closing tags?\n"
+    "3. When would you NOT bother with XML structure?\n"
+    "4. How does the system prompt interact with the user message?\n"
+)
diff --git a/Prompting Exercise/ex02_persona_task_data.py b/Prompting Exercise/ex02_persona_task_data.py
new file mode 100644
index 0000000..8f45be2
--- /dev/null
+++ b/Prompting Exercise/ex02_persona_task_data.py	
@@ -0,0 +1,151 @@
+"""
+Exercise 2 – Persona, Task, and Data in a Structured Prompt
+============================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+
+Learning goals
+--------------
+* Use XML tags to separate three prompt concerns: WHO the LLM is,
+  WHAT it should do, and the DATA it should work with.
+* Pass a real Python file as context (RAG-style) inside a <code> tag.
+* Iterate on the prompt to extract more specific information.
+
+The file analyze_me.py contains several bugs and style issues.
+You will ask the LLM to find and explain them.
+
+Tasks
+-----
+Part A  Build a structured prompt with <persona>, <task>, and <code> tags
+        and ask the LLM to review analyze_me.py (TODOs 1-4).
+Part B  Refine the prompt to request a prioritised bug list (TODOs 5-6).
+Part C  Ask for a corrected version of one specific function (TODO 7).
+
+"""
+
+from pathlib import Path
+
+from server_utils import chat, get_client, print_messages, print_separator
+
+client = get_client()
+
+# Read the file we want the LLM to analyse
+code_to_review = Path("analyze_me.py").read_text()
+
+
+# ── Part A: Persona + Task + Code ─────────────────────────────────────────────
+print_separator("Part A – Structured Prompt: Persona / Task / Code")
+
+# TODO 1:  Fill in the <persona> tag.
+#          Define a senior Python engineer who is rigorous about correctness
+#          and follows PEP-8 and best practices.
+
+# TODO 2:  Fill in the <task> tag.
+#          Ask the LLM to review the Python code and identify ALL bugs,
+#          listing each one with a short explanation of why it is a bug.
+
+# TODO 3:  The <code> tag already contains the file — do not change it.
+
+# TODO 4:  Build the messages list using only a user message (no system prompt yet).
+#          Call chat() and print the result.
+
+prompt_a = f"""\
+<persona>
+  You are a Python engineer who is rigorous about correctness and follows PEP-8 and best practices.
+</persona>
+
+<task>
+    Review the Python code and identify ALL bugs, listing each one with a short explanation of why it is a bug.
+</task>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_a = [
+    {"role": "user", "content": prompt_a}
+]
+
+# print_messages(messages_a)
+# response_a = chat(client, messages_a)
+# print(response_a)
+
+
+# ── Part B: Refine – Ask for a Prioritised Bug List ───────────────────────────
+print_separator("Part B – Refined Prompt: Prioritised Bug List")
+
+# TODO 5:  Extend the <task> from Part A to ask the LLM to:
+#          - Separate bugs by severity: Critical / Medium / Style
+#          - For each bug: state the line number, the problem, and a one-line fix hint
+#
+# Tip: add a <output_format> tag that describes exactly how you want the answer
+#      structured (plain text for now — we tackle real machine output in Ex 3).
+
+# TODO 6:  Build messages_b with a system prompt that reinforces the persona
+#          and a user message with the refined prompt.
+#          Call chat() and print the result.
+
+system_b = """\
+<request>
+  <persona>You are a master python developer and teacher</persona>
+  <style>You follow the PEP 8 style guide</style>
+  <constraints>Format your response in json</constraints>
+</request>
+"""
+
+prompt_b = f"""\
+<persona>
+  You are a Python engineer who is rigorous about correctness and follows PEP-8 and best practices.
+</persona>
+
+<task>
+    Review the Python code and identify ALL bugs, listing each one with a short explanation of why it is a bug.
+    Separate bugs by severity: Critical / Medium / Style
+     or each bug: state the line number, the problem, and a one-line fix hint
+</task>
+
+<output_format>
+    ...
+</output_format>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_b = [
+    {"role": "system", "content": system_b},
+    {"role": "user",   "content": prompt_b},
+]
+print_messages(messages_b)
+response_b = chat(client, messages_b)
+print(response_b)
+
+
+# ── Part C: Request a Corrected Function ──────────────────────────────────────
+print_separator("Part C – Ask for a Corrected Function")
+
+# TODO 7:  Pick one buggy function from analyze_me.py (e.g. calculate_statistics).
+#          Write a new user message — continuing the SAME conversation as Part B —
+#          that asks the LLM to rewrite that function with all bugs fixed,
+#          including proper type hints and a docstring.
+#
+# Key insight: you can reuse the model's previous response by appending it to
+# the messages list as an "assistant" message, then adding a new "user" message.
+# This is how multi-turn conversations work with the API.
+
+messages_c = messages_b + [
+    {"role": "assistant", "content": response_b},   # LLM's previous answer
+    {"role": "user",      "content": "Fix all bugs, keep the rest as it is"},
+]
+print_messages(messages_c)
+response_c = chat(client, messages_c)
+print(response_c)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. Did the LLM find all 7 bugs? Which did it miss?\n"
+    "2. How did the <output_format> tag change the structure of the answer?\n"
+    "3. What is the advantage of continuing a conversation vs. starting fresh?\n"
+    "4. How would you scale this pattern to a large codebase (many files)?\n"
+)
diff --git a/Prompting Exercise/ex02_persona_task_data_solution.py b/Prompting Exercise/ex02_persona_task_data_solution.py
new file mode 100644
index 0000000..cd3c586
--- /dev/null
+++ b/Prompting Exercise/ex02_persona_task_data_solution.py	
@@ -0,0 +1,122 @@
+"""
+Exercise 2 – SOLUTION – Persona, Task, and Data in a Structured Prompt
+=======================================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+"""
+
+from pathlib import Path
+
+from server_utils import chat, get_client, print_messages, print_separator
+
+client = get_client()
+
+code_to_review = Path("analyze_me.py").read_text()
+temperature_value=1
+
+# ── Part A: Persona + Task + Code ─────────────────────────────────────────────
+print_separator("Part A – Structured Prompt: Persona / Task / Code")
+
+prompt_a = f"""\
+<persona>
+  You are a senior Python engineer with 10+ years of experience.
+  You are rigorous about correctness, follow PEP-8 strictly, and care
+  deeply about defensive programming and readable code.
+</persona>
+
+<task>
+  Review the Python code provided below.
+  Identify every bug and code-quality issue you can find.
+  For each issue, state what is wrong and why it is a problem.
+</task>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_a = [
+    {"role": "user", "content": prompt_a}
+]
+
+print_messages(messages_a)
+response_a = chat(client, messages_a, temperature=temperature_value)
+print(response_a)
+
+
+# ── Part B: Refine – Ask for a Prioritised Bug List ───────────────────────────
+print_separator("Part B – Refined Prompt: Prioritised Bug List")
+
+system_b = """\
+You are a senior Python engineer performing a thorough code review.
+Be concise, precise, and always refer to line numbers when available.
+"""
+
+prompt_b = f"""\
+<persona>
+  You are a senior Python engineer with 10+ years of experience.
+  You are rigorous about correctness, follow PEP-8, and care about
+  defensive programming and readable code.
+</persona>
+
+<task>
+  Review the Python code below.
+  Identify every bug and code-quality issue.
+  Classify each finding by severity:
+    - Critical  : causes a crash or wrong result under normal use
+    - Medium    : bad practice that will cause problems in production
+    - Style     : violates PEP-8 or reduces readability
+</task>
+
+<output_format>
+  For each finding produce exactly this structure (plain text):
+    [SEVERITY] Line <N>: <one-sentence problem description>
+    Fix hint: <one-sentence suggestion>
+
+  Group findings under headings: ## Critical, ## Medium, ## Style
+</output_format>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_b = [
+    {"role": "system", "content": system_b},
+    {"role": "user",   "content": prompt_b},
+]
+
+print_messages(messages_b)
+response_b = chat(client, messages_b, temperature=temperature_value)
+print(response_b)
+
+
+# ── Part C: Request a Corrected Function ──────────────────────────────────────
+print_separator("Part C – Ask for a Corrected Function")
+
+followup = """\
+<task>
+  Rewrite only the `calculate_statistics` function with all bugs fixed.
+  Requirements:
+    - Handle an empty list gracefully (return None or raise ValueError with a clear message)
+    - Use sample variance (divide by N-1)
+    - Add full PEP-8 type hints
+    - Add a NumPy-style docstring
+  Return only the function code, no surrounding explanation.
+</task>"""
+
+messages_c = messages_b + [
+    {"role": "assistant", "content": response_b},
+    {"role": "user",      "content": followup},
+]
+
+print_messages(messages_c)
+response_c = chat(client, messages_c, temperature=temperature_value)
+print(response_c)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. Did the LLM find all 7 bugs? Which did it miss?\n"
+    "2. How did the <output_format> tag change the structure of the answer?\n"
+    "3. What is the advantage of continuing a conversation vs. starting fresh?\n"
+    "4. How would you scale this pattern to a large codebase (many files)?\n"
+)
diff --git a/Prompting Exercise/ex03_structured_output.py b/Prompting Exercise/ex03_structured_output.py
new file mode 100644
index 0000000..5b87616
--- /dev/null
+++ b/Prompting Exercise/ex03_structured_output.py	
@@ -0,0 +1,231 @@
+"""
+Exercise 3 – Structured Input and Structured Output
+====================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+
+Learning goals
+--------------
+* Request machine-parseable output (JSON and YAML) from the LLM.
+* Parse the JSON response in Python and use it programmatically.
+* Build a second prompt dynamically from the parsed data.
+* Understand why structured output is essential for LLM pipelines.
+
+Tasks
+-----
+Part A  Ask the LLM to review analyze_me.py and return a JSON report (TODOs 1-4).
+Part B  Parse the JSON response and print a summary table (TODOs 5-6).
+Part C  Use the parsed data to build a follow-up prompt automatically (TODOs 7-8).
+Part D  Repeat Part A but request YAML instead of JSON (TODO 9).
+
+Estimated time: 40-50 minutes
+"""
+
+import json
+from pathlib import Path
+
+from server_utils import chat, chat_json, get_client, print_messages, print_separator
+
+client = get_client()
+
+code_to_review = Path("analyze_me.py").read_text()
+
+
+# ── Part A: Structured Input → JSON Output ────────────────────────────────────
+print_separator("Part A – Request JSON Output")
+
+# TODO 1:  Write a system prompt that instructs the model to ALWAYS respond
+#          with valid JSON and nothing else (no markdown fences, no explanation).
+
+system_a = """\
+<request>
+  <persona>You are a master python tutor</persona>
+  <style>You follow the PEP 8 style guide</style>
+  <constraints>Only respond in a json format following the user provided schema</constraints>
+</request>
+"""
+
+# TODO 2:  Write the user prompt.
+#          Use XML tags for <persona>, <task>, and <code>.
+#
+#          In <task>, specify the exact JSON schema you expect:
+#
+schema = """{
+  "summary": "<one sentence overview>",
+  "bugs": [
+    {
+      "id": 1,
+      "severity": "Critical|Medium|Style",
+      "line": <int or null>,
+      "function": "<function name>",
+      "description": "<what is wrong>",
+      "fix": "<one-sentence fix hint>"
+    },
+    ...
+  ],
+  "overall_quality": "Poor|Fair|Good|Excellent"
+}"""
+#
+# Tip: paste the schema directly inside a <schema> tag in your prompt.
+
+prompt_a = f"""\
+TODO: Write your structured prompt here.
+Include <persona>, <task>, <schema>, and <code> tags.
+
+<persona>
+    You are a Python engineer who is rigorous about correctness and follows PEP-8 and best practices.
+</persona>
+
+<task>
+    Review the Python code and identify ALL bugs.
+    Explain all the bugs you found the schema provided.
+</task>
+
+<schema>
+{schema}
+</schema>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_a = [
+    # TODO 3: build the messages list (system + user)
+    {"role": "system", "content": system_a},
+    {"role": "user",   "content": prompt_a},
+]
+
+# TODO 4: call chat_json() and store the raw response string in raw_json_a.
+#          chat_json() adds response_format={"type": "json_object"} so the
+#          server guarantees the output is parseable by json.loads().
+print_messages(messages_a)
+raw_json_a = chat_json(client, messages_a)
+print("Raw response:")
+print(raw_json_a)
+
+
+# ── Part B: Parse the JSON and Display a Summary ──────────────────────────────
+print_separator("Part B – Parse JSON and Print Summary")
+
+# TODO 5:  Parse raw_json_a with json.loads().
+#          Handle the case where the model returned malformed JSON
+#          (wrap in try/except and print a helpful error message).
+
+report = json.loads(raw_json_a)
+
+# TODO 6:  Print a formatted summary table like this:
+#
+#   Overall quality : Fair
+#   Summary         : ...
+#
+#   ID | Severity | Line | Function              | Description
+#   ---+----------+------+-----------------------+---------------------------
+#    1 | Critical |   12 | calculate_statistics  | ZeroDivisionError on ...
+#    2 | ...
+#
+# Hint: use f-strings and ljust() / rjust() for alignment.
+
+print(f"Overall quality : {report['overall_quality']}")
+print(f"Summary         : {report['summary']}\n")
+
+bugs = report.get("bugs", [])
+if bugs:
+    headers = {
+        "id": "ID",
+        "severity": "Severity",
+        "line": "Line",
+        "function": "Function",
+        "description": "Description",
+    }
+
+    # Compute column widths
+    widths = {
+        key: max(len(headers[key]), *(len(str(b[key])) for b in bugs))
+        for key in headers
+    }
+
+    # Header row
+    print(
+        f"{headers['id'].ljust(widths['id'])} | "
+        f"{headers['severity'].ljust(widths['severity'])} | "
+        f"{headers['line'].ljust(widths['line'])} | "
+        f"{headers['function'].ljust(widths['function'])} | "
+        f"{headers['description']}"
+    )
+
+    # Separator row
+    print(
+        f"{'-' * widths['id']}-+-"
+        f"{'-' * widths['severity']}-+-"
+        f"{'-' * widths['line']}-+-"
+        f"{'-' * widths['function']}-+-"
+        f"{'-' * widths['description']}"
+    )
+
+    # Data rows
+    for bug in bugs:
+        print(
+            f"{str(bug['id']).ljust(widths['id'])} | "
+            f"{bug['severity'].ljust(widths['severity'])} | "
+            f"{str(bug['line']).ljust(widths['line'])} | "
+            f"{bug['function'].ljust(widths['function'])} | "
+            f"{bug['description']}"
+        )
+
+# ── Part C: Use the Parsed Data to Build a Follow-Up Prompt ──────────────────
+print_separator("Part C – Dynamic Follow-Up Prompt from Parsed Data")
+
+# TODO 7:  Select all bugs with severity "Critical" from the parsed report.
+#          Build a new user prompt that:
+#          - Lists each critical bug by ID and description
+#          - Asks the LLM to provide the corrected code for each one
+#          - Requests the output as a JSON OBJECT (not a bare array, because
+#            response_format=json_object requires an object at the top level):
+#            {"fixes": [{"bug_id": 1, "fixed_code": "..."}, ...]}
+#
+# Tip: wrap the schema in a {"fixes": [...]} object so chat_json() works.
+
+critical_bugs = [b for b in report["bugs"] if b["severity"] == "Critical"]
+
+followup_prompt = """\
+TODO: Build the follow-up prompt dynamically using the critical_bugs list.
+      Loop over critical_bugs to embed each bug's description in the prompt.
+"""
+
+# TODO 8:  Continue the conversation (multi-turn) by appending the previous
+#          response and the new prompt, then call chat_json() and parse the result.
+#          Because the schema is {"fixes": [...]}, extract the list with ["fixes"].
+
+# messages_c = messages_a + [
+#     {"role": "assistant", "content": raw_json_a},
+#     {"role": "user",      "content": followup_prompt},
+# ]
+# print_messages(messages_c)
+# raw_json_c = chat_json(client, messages_c)
+# fixes = json.loads(raw_json_c)["fixes"]
+# for fix in fixes:
+#     print(f"\n--- Fix for bug {fix['bug_id']} ---")
+#     print(fix["fixed_code"])
+
+
+# ── Part D: Request YAML Instead of JSON ─────────────────────────────────────
+print_separator("Part D – YAML Output")
+
+# TODO 9:  Repeat Part A but ask for YAML output instead of JSON.
+#          Install PyYAML if needed: pip install pyyaml
+#          Parse the response with yaml.safe_load() and print the result.
+#
+# Question: Which format do you prefer for human-readable reports? For
+#           machine-to-machine pipelines?
+
+# import yaml
+# ...
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. What can go wrong when asking an LLM to return JSON?\n"
+    "2. How did the <schema> tag influence the output structure?\n"
+    "3. Why is structured output important for building LLM pipelines?\n"
+    "4. When would you use JSON vs. YAML vs. plain text?\n"
+)
diff --git a/Prompting Exercise/ex03_structured_output_solution.py b/Prompting Exercise/ex03_structured_output_solution.py
new file mode 100644
index 0000000..b2fa154
--- /dev/null
+++ b/Prompting Exercise/ex03_structured_output_solution.py	
@@ -0,0 +1,188 @@
+"""
+Exercise 3 – SOLUTION – Structured Input and Structured Output
+==============================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+"""
+
+import json
+from pathlib import Path
+
+import yaml  # pip install pyyaml
+
+from server_utils import chat, chat_json, get_client, print_messages, print_separator
+
+client = get_client()
+
+code_to_review = Path("analyze_me.py").read_text()
+
+
+# ── Part A: Structured Input → JSON Output ────────────────────────────────────
+print_separator("Part A – Request JSON Output")
+
+system_a = """\
+You are a code-review assistant. You ALWAYS respond with valid JSON and
+nothing else — no markdown code fences, no introductory text, no trailing
+commentary. Your entire response must be parseable by json.loads().
+"""
+
+prompt_a = f"""\
+<persona>
+  You are a senior Python engineer performing a thorough, structured code review.
+</persona>
+
+<task>
+  Review the Python code below and return your findings as JSON.
+  Follow the schema defined in <schema> exactly.
+</task>
+
+<schema>
+{{
+  "summary": "<one-sentence overview of the code quality>",
+  "bugs": [
+    {{
+      "id": 1,
+      "severity": "Critical|Medium|Style",
+      "line": <integer line number or null if not applicable>,
+      "function": "<name of the affected function>",
+      "description": "<what is wrong and why it matters>",
+      "fix": "<one-sentence fix hint>"
+    }}
+  ],
+  "overall_quality": "Poor|Fair|Good|Excellent"
+}}
+</schema>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_a = [
+    {"role": "system", "content": system_a},
+    {"role": "user",   "content": prompt_a},
+]
+
+print_messages(messages_a)
+raw_json_a = chat_json(client, messages_a)   # response_format=json_object → always valid JSON
+print("Raw response:")
+print(raw_json_a)
+
+
+# ── Part B: Parse the JSON and Display a Summary ──────────────────────────────
+print_separator("Part B – Parse JSON and Print Summary")
+
+report = json.loads(raw_json_a)
+
+
+print(f"Overall quality : {report['overall_quality']}")
+print(f"Summary         : {report['summary']}\n")
+
+col_w = [4, 10, 6, 24, 45]
+header = (
+    f"{'ID':<{col_w[0]}} | {'Severity':<{col_w[1]}} | {'Line':<{col_w[2]}} | "
+    f"{'Function':<{col_w[3]}} | {'Description':<{col_w[4]}}"
+)
+print(header)
+print("-" * len(header))
+
+for bug in report["bugs"]:
+    line_str = str(bug["line"]) if bug["line"] is not None else "—"
+    print(
+        f"{bug['id']:<{col_w[0]}} | "
+        f"{bug['severity']:<{col_w[1]}} | "
+        f"{line_str:<{col_w[2]}} | "
+        f"{bug['function']:<{col_w[3]}} | "
+        f"{bug['description'][:col_w[4]]}"
+    )
+
+
+# ── Part C: Use the Parsed Data to Build a Follow-Up Prompt ──────────────────
+print_separator("Part C – Dynamic Follow-Up Prompt from Parsed Data")
+
+critical_bugs = [b for b in report["bugs"] if b["severity"] == "Critical"]
+
+if not critical_bugs:
+    print("No critical bugs found — nothing to fix.")
+else:
+    lines = []
+    for b in critical_bugs:
+        lines.append(f'  - Bug {b["id"]} (line {b["line"]}): {b["description"]}')
+    bug_list_text = "\n".join(lines)
+
+    followup_prompt = f"""\
+<task>
+  The following critical bugs were found in analyze_me.py:
+
+{bug_list_text}
+
+  For each bug, provide the corrected Python code snippet (the full function
+  is fine). Return your answer as a JSON object with this schema:
+  {{
+    "fixes": [
+      {{"bug_id": <int>, "fixed_code": "<corrected Python code as a string>"}}
+    ]
+  }}
+  No markdown, no explanation — only the JSON object.
+</task>"""
+
+    messages_c = messages_a + [
+        {"role": "assistant", "content": raw_json_a},
+        {"role": "user",      "content": followup_prompt},
+    ]
+
+    print_messages(messages_c)
+    raw_json_c = chat_json(client, messages_c)
+
+    fixes = json.loads(raw_json_c)["fixes"]
+    for fix in fixes:
+        print(f"\n--- Fix for bug {fix['bug_id']} ---")
+        print(fix["fixed_code"])
+
+
+# ── Part D: Request YAML Instead of JSON ─────────────────────────────────────
+print_separator("Part D – YAML Output")
+
+system_d = """\
+You are a code-review assistant. You ALWAYS respond with valid YAML and
+nothing else — no markdown fences, no introductory text.
+"""
+
+prompt_d = f"""\
+<persona>
+  You are a senior Python engineer performing a structured code review.
+</persona>
+
+<task>
+  Review the code below and return your findings as YAML.
+  Use the same fields as before: summary, bugs (with id/severity/line/
+  function/description/fix), and overall_quality.
+</task>
+
+<code language="python" filename="analyze_me.py">
+{code_to_review}
+</code>"""
+
+messages_d = [
+    {"role": "system", "content": system_d},
+    {"role": "user",   "content": prompt_d},
+]
+
+print_messages(messages_d)
+raw_yaml = chat(client, messages_d, temperature=0.2)
+
+try:
+    yaml_report = yaml.safe_load(raw_yaml)
+    print(f"Parsed YAML – overall quality: {yaml_report.get('overall_quality')}")
+    print(f"Number of bugs found: {len(yaml_report.get('bugs', []))}")
+except yaml.YAMLError as e:
+    print(f"ERROR: malformed YAML: {e}")
+    print(raw_yaml)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. What can go wrong when asking an LLM to return JSON?\n"
+    "2. How did the <schema> tag influence the output structure?\n"
+    "3. Why is structured output important for building LLM pipelines?\n"
+    "4. When would you use JSON vs. YAML vs. plain text?\n"
+)
diff --git a/Prompting Exercise/ex04_cot_pipeline.py b/Prompting Exercise/ex04_cot_pipeline.py
new file mode 100644
index 0000000..606515b
--- /dev/null
+++ b/Prompting Exercise/ex04_cot_pipeline.py	
@@ -0,0 +1,300 @@
+"""
+Exercise 4 – Build Your Own Chain-of-Thought Pipeline
+======================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+
+Learning goals
+--------------
+* Understand that reasoning models (o1, DeepSeek-R1, Qwen3 think mode)
+  generate a hidden "plan" before giving the final answer.
+* Replicate this behaviour manually using multiple LLM calls:
+    Call 1  (Planning)  – structured input  →  structured JSON plan
+    Calls 2…N (Execution) – iterate step-by-step, validating each step
+* See why explicit reasoning steps improve answer quality for complex tasks.
+
+Background
+----------
+When you disable Qwen3's built-in thinking mode (as we do in server_utils),
+you get fast, direct answers — but no explicit reasoning.
+In this exercise you rebuild that reasoning step yourself, step by step,
+so you can inspect and control the thinking process.
+
+The problem
+-----------
+Given the buggy analyze_me.py from earlier exercises, design and implement
+a corrected, production-ready version of the full module.
+
+Tasks
+-----
+Part A  Planning phase: structured input → JSON reasoning plan (TODOs 1-5).
+Part B  Iterative execution: apply each plan step one at a time,
+        validating syntax after each step (TODOs 6-10).
+Part C  Reflection — compare with and without CoT (TODO 11).
+
+Estimated time: 50-60 minutes
+"""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+from server_utils import (
+    chat, chat_json, get_client, print_messages, print_separator,
+    strip_code_fences,
+)
+
+client = get_client()
+
+code_to_fix = Path("analyze_me.py").read_text()
+
+# ── The Problem Statement ─────────────────────────────────────────────────────
+# We will use this description in both phases so we define it once.
+
+PROBLEM = """\
+Rewrite the Python module analyze_me.py so that it is correct,
+robust, and production-ready.
+
+Requirements:
+  1. calculate_statistics() must handle empty lists without crashing.
+  2. Use sample variance (divide by N-1).
+  3. process_data() must use a context manager and handle non-numeric lines.
+  4. normalize() must fix the operator-precedence bug and raise ValueError
+     for unknown methods.
+  5. All functions must have PEP-484 type hints and NumPy-style docstrings.
+  6. The module must pass basic sanity checks when run as __main__.
+"""
+
+
+# ── Part A: Planning Phase ────────────────────────────────────────────────────
+print_separator("Part A – Planning Phase (CoT Step 1)")
+
+# The goal of this phase is NOT to write the code — it is to produce a
+# structured plan: what steps are needed and in what order?
+
+# TODO 1:  Write a system prompt that instructs the model to act as a
+#          "software architect" whose job is ONLY to produce a plan,
+#          never to write the final code.
+#          IMPORTANT: explicitly forbid code snippets in all fields —
+#          use plain English only. This prevents unescaped quotes from
+#          breaking the JSON output.
+#          Enforce JSON-only output.
+
+system_plan = """\
+TODO: Write a system prompt for the planning phase.
+      The model should only reason and plan, not write code.
+      Enforce JSON-only output.
+"""
+
+# TODO 2:  Write the planning user prompt using XML tags:
+#            <problem>   – embed the PROBLEM string
+#            <code>      – embed the buggy code_to_fix
+#            <task>      – ask for a step-by-step plan
+#            <schema>    – specify the exact JSON schema for the plan:
+#
+#          {
+#            "goal": "<one sentence goal>",
+#            "steps": [
+#              {
+#                "step_id": 1,
+#                "title": "<short title>",
+#                "reasoning": "<why this step is needed>",
+#                "action": "<what to do in this step — plain English, no code>",
+#                "depends_on": []   // list of step_ids this step depends on
+#              },
+#              ...
+#            ]
+#          }
+
+prompt_plan = f"""\
+TODO: Write the planning prompt here.
+Use <problem>, <code>, <task>, and <schema> tags.
+
+<problem>
+{PROBLEM}
+</problem>
+
+<code language="python" filename="analyze_me.py">
+{code_to_fix}
+</code>"""
+
+# TODO 3:  Build messages_plan (system + user) and call chat_json().
+#          Use chat_json() (not chat()) so the server enforces valid JSON via
+#          response_format={"type": "json_object"}.
+#          Use max_tokens=4096 — the plan can be long and would get cut off
+#          with the default 2048, producing truncated (unparseable) JSON.
+
+messages_plan = [
+    # TODO: add system and user messages
+]
+
+# print_messages(messages_plan)
+# raw_plan = chat_json(client, messages_plan, max_tokens=4096)
+# print("Raw plan JSON:")
+# print(raw_plan)
+
+
+# TODO 4:  Parse raw_plan with json.loads().
+#          Print each step in a readable format:
+#            Step 1 – <title>
+#              Reasoning : <reasoning>
+#              Action    : <action>
+
+# plan = json.loads(raw_plan)
+# print(f"\nGoal: {plan['goal']}\n")
+# for step in plan["steps"]:
+#     print(f"Step {step['step_id']} – {step['title']}")
+#     print(f"  Reasoning : {step['reasoning']}")
+#     print(f"  Action    : {step['action']}\n")
+
+
+# TODO 5:  (Optional) Inspect the plan critically.
+#          Does the order of steps make sense?
+#          Are any steps missing?
+#          You can edit the plan dict before passing it to the execution phase.
+
+
+# ── Part B: Iterative Execution Phase ────────────────────────────────────────
+print_separator("Part B – Iterative Execution Phase (CoT Step 2)")
+
+# KEY INSIGHT: Instead of dumping the entire plan into one big prompt
+# (which would just be another one-shot), we iterate through each step
+# individually. After every step we:
+#   1. Feed the model only the CURRENT step + the accumulated code so far
+#   2. Validate the output (syntax check via py_compile)
+#   3. Use the validated output as input for the next step
+#
+# This mirrors how a real developer works: implement one change, verify it
+# compiles, then move on. The model always works with CONCRETE code from
+# the previous step rather than an abstract plan of what it intends to write.
+
+# TODO 6:  Write a system prompt for the execution phase.
+#          The model should act as a developer who receives the current
+#          state of a module plus a single step to implement.
+#          It should apply ONLY that step and return the full updated module.
+
+system_exec = """\
+TODO: Write a system prompt for the step-by-step execution phase.
+      The model should apply ONE step at a time.
+"""
+
+
+# TODO 7:  Complete the validate_syntax() function below.
+#          It should write code to a temp file and run py_compile on it.
+#          Return (True, "") if syntax is valid, (False, error_message) otherwise.
+
+def validate_syntax(code: str) -> tuple[bool, str]:
+    """Write code to a temp file and run py_compile to check syntax."""
+    tmp = Path("_tmp_validate.py")
+    # TODO: write code to tmp, run py_compile, clean up, return result
+    tmp.unlink(missing_ok=True)
+    return True, ""   # placeholder
+
+
+# TODO 8:  Implement the step-by-step execution loop.
+#          Start with current_code = code_to_fix (the original buggy code).
+#          For each step in plan["steps"]:
+#            a) Build a prompt with <current_code>, <step>, and <task> tags
+#            b) Call chat() with the prompt
+#            c) Strip code fences from the response
+#            d) Validate syntax using validate_syntax()
+#            e) If valid: update current_code
+#            f) If invalid: retry ONCE with error feedback
+#            g) Print the code after each step
+
+# current_code = code_to_fix
+#
+# for step in plan["steps"]:
+#     step_id = step["step_id"]
+#     print_separator(f"Executing Step {step_id} – {step['title']}")
+#
+#     prompt_step = f"""\
+# TODO: Build the per-step prompt here.
+# Include <current_code>, <step>, and <task> tags.
+# Tell the model to apply ONLY this step."""
+#
+#     messages_step = [
+#         {"role": "system", "content": system_exec},
+#         {"role": "user",   "content": prompt_step},
+#     ]
+#
+#     print_messages(messages_step)
+#     raw_response = chat(client, messages_step, temperature=0.2, max_tokens=4096)
+#     step_code = strip_code_fences(raw_response)
+#
+#     # Validate syntax
+#     ok, error_msg = validate_syntax(step_code)
+#     if ok:
+#         print(f"  [PASS] Step {step_id} – syntax OK")
+#         current_code = step_code
+#     else:
+#         print(f"  [FAIL] Step {step_id} – syntax error: {error_msg}")
+#         # TODO: retry with error feedback (see TODO 9)
+#
+#     print(f"\n--- Code after Step {step_id} ---")
+#     print(current_code)
+
+
+# TODO 9:  Implement the retry logic for syntax errors.
+#          When a step produces invalid syntax:
+#            a) Build a retry prompt with the <error> and the broken <code>
+#            b) Ask the model to fix the syntax error
+#            c) Validate again
+#            d) If still broken, keep the last valid code and continue
+
+
+# TODO 10: Save the final result and run it as a validation.
+#          - Save current_code to "analyze_me_fixed.py"
+#          - Run it with subprocess and print the output
+
+# Path("analyze_me_fixed.py").write_text(current_code)
+# print("\nSaved iterative CoT result to analyze_me_fixed.py")
+#
+# result = subprocess.run(
+#     [sys.executable, "analyze_me_fixed.py"],
+#     capture_output=True, text=True,
+# )
+# print("STDOUT:", result.stdout)
+# if result.stderr:
+#     print("STDERR:", result.stderr)
+# print(f"Exit code: {result.returncode}")
+
+
+# ── Part C: Compare With and Without CoT ─────────────────────────────────────
+print_separator("Part C – Baseline: Direct Prompt Without CoT")
+
+# TODO 11: Send the same problem to the model in a SINGLE prompt with NO plan.
+#          Compare this response with the iterative CoT version.
+
+direct_prompt = f"""\
+TODO: Write a direct, single-shot prompt asking the model to rewrite
+      analyze_me.py according to the PROBLEM requirements.
+      No plan, no iteration — just ask directly.
+
+<problem>
+{PROBLEM}
+</problem>
+
+<code language="python" filename="analyze_me.py">
+{code_to_fix}
+</code>"""
+
+# messages_direct = [{"role": "user", "content": direct_prompt}]
+# print_messages(messages_direct)
+# direct_response = chat(client, messages_direct, temperature=0.3, max_tokens=4096)
+# print(direct_response)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. How did the iterative CoT output differ from the direct single-shot?\n"
+    "2. Did the validation step catch any syntax errors? How were they fixed?\n"
+    "3. What would happen if you gave the model a deliberately wrong plan?\n"
+    "4. How does this manual CoT pipeline relate to built-in thinking modes\n"
+    "   in models like o1, DeepSeek-R1, and Qwen3 with think mode enabled?\n"
+    "5. What are the trade-offs of step-by-step iteration vs. one-shot?\n"
+    "   (Think: latency, cost, error isolation, debuggability)\n"
+    "6. How could you extend the validation step beyond syntax checking?\n"
+    "   (Hint: unit tests, type checking, linting)\n"
+)
diff --git a/Prompting Exercise/ex04_cot_pipeline_solution.py b/Prompting Exercise/ex04_cot_pipeline_solution.py
new file mode 100644
index 0000000..ea766dd
--- /dev/null
+++ b/Prompting Exercise/ex04_cot_pipeline_solution.py	
@@ -0,0 +1,279 @@
+"""
+Exercise 4 – SOLUTION – Build Your Own Chain-of-Thought Pipeline
+================================================================
+AISE501 · Prompting in Coding · Spring Semester 2026
+"""
+
+import ast
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+from server_utils import (
+    chat, chat_json, get_client, print_messages, print_separator,
+    strip_code_fences,
+)
+
+client = get_client()
+
+code_to_fix = Path("analyze_me.py").read_text()
+
+PROBLEM = """\
+Rewrite the Python module analyze_me.py so that it is correct,
+robust, and production-ready.
+
+Requirements:
+  1. calculate_statistics() must handle empty lists without crashing.
+  2. Use sample variance (divide by N-1).
+  3. process_data() must use a context manager and handle non-numeric lines.
+  4. normalize() must fix the operator-precedence bug and raise ValueError
+     for unknown methods.
+  5. All functions must have PEP-484 type hints and NumPy-style docstrings.
+  6. The module must pass basic sanity checks when run as __main__.
+"""
+
+
+# ── Part A: Planning Phase ────────────────────────────────────────────────────
+print_separator("Part A – Planning Phase (CoT Step 1)")
+
+system_plan = """\
+You are a software architect. Your ONLY job right now is to produce a
+structured reasoning plan. You must NOT write any Python code or code
+snippets anywhere in your response — not in action fields, not in
+reasoning fields, nowhere. Use plain English descriptions only.
+Respond with valid JSON only (no markdown fences, no extra text).
+"""
+
+prompt_plan = f"""\
+<problem>
+{PROBLEM}
+</problem>
+
+<code language="python" filename="analyze_me.py">
+{code_to_fix}
+</code>
+
+<task>
+  Analyse the problem and the buggy code above.
+  Produce a step-by-step plan that a developer can follow to implement
+  the corrected module. Each step must be atomic and self-contained.
+</task>
+
+<schema>
+{{
+  "goal": "<one-sentence goal>",
+  "steps": [
+    {{
+      "step_id": 1,
+      "title": "<short title>",
+      "reasoning": "<why this step is necessary>",
+      "action": "<concrete action to take — plain English only, no code>",
+      "depends_on": []
+    }}
+  ]
+}}
+</schema>"""
+
+messages_plan = [
+    {"role": "system", "content": system_plan},
+    {"role": "user",   "content": prompt_plan},
+]
+
+print_messages(messages_plan)
+raw_plan = chat_json(client, messages_plan, max_tokens=4096)
+print("Raw plan JSON:")
+print(raw_plan)
+
+plan = json.loads(raw_plan)
+
+print(f"\nGoal: {plan['goal']}\n")
+for step in plan["steps"]:
+    print(f"Step {step['step_id']} – {step['title']}")
+    print(f"  Reasoning : {step['reasoning']}")
+    print(f"  Action    : {step['action']}")
+    deps = step.get("depends_on", [])
+    if deps:
+        print(f"  Depends on: steps {deps}")
+    print()
+
+
+# ── Part B: Iterative Execution Phase ────────────────────────────────────────
+print_separator("Part B – Iterative Execution Phase (CoT Step 2)")
+
+# Instead of dumping the entire plan into a single prompt, we iterate through
+# each step individually.  After every step we:
+#   1. Feed the model only the CURRENT step + the accumulated code so far
+#   2. Validate the output (syntax check via py_compile)
+#   3. Use the validated output as input for the next step
+#
+# This mirrors how a real developer works: implement one change, verify it
+# compiles, then move on.  It also means the model always works with CONCRETE
+# code from the previous step rather than an abstract plan of what it intends
+# to write.
+
+system_exec = """\
+You are a senior Python developer. You receive the current state of a
+Python module together with a single step to implement. Apply ONLY the
+requested change. Return the complete updated module — no explanations
+outside the code block.
+"""
+
+
+def validate_syntax_ast(code: str) -> tuple[bool, str]:
+    """Use ast.parse to check whether code is syntactically valid Python."""
+    try:
+        ast.parse(code)
+        return True, ""
+    except SyntaxError as e:
+        return False, str(e)
+
+def validate_syntax(code: str) -> tuple[bool, str]:
+    """Write code to a temp file and run py_compile to check syntax."""
+    tmp = Path("_tmp_validate.py")
+    # TODO: write code to tmp, run py_compile, clean up, return result
+    tmp.unlink(missing_ok=True)
+    return True, ""   # placeholder
+
+
+current_code = code_to_fix          # start with the original buggy code
+
+for step in plan["steps"]:
+    step_id = step["step_id"]
+    print_separator(f"Executing Step {step_id} – {step['title']}")
+
+    prompt_step = f"""\
+<current_code>
+{current_code}
+</current_code>
+
+<step>
+  Step {step_id}: {step['title']}
+  Action: {step['action']}
+  Reasoning: {step['reasoning']}
+</step>
+
+<task>
+  Apply ONLY this single step to the current code above.
+  Do not skip ahead to other steps.
+  Mark your change with a comment: # Step {step_id} – {step['title']}
+  Return the complete updated Python module.
+  Do not include any explanation outside the code.
+</task>"""
+
+    messages_step = [
+        {"role": "system", "content": system_exec},
+        {"role": "user",   "content": prompt_step},
+    ]
+
+    print_messages(messages_step)
+    raw_response = chat(client, messages_step, temperature=0.2, max_tokens=4096)
+    step_code = strip_code_fences(raw_response)
+
+    # ── Validate: syntax check before moving on ──
+    ok, error_msg = validate_syntax(step_code)
+    if ok:
+        print(f"  [PASS] Step {step_id} – syntax OK")
+        current_code = step_code
+    else:
+        print(f"  [FAIL] Step {step_id} – syntax error:\n{error_msg}")
+        print("  Retrying with error feedback...")
+
+        # Give the model one chance to fix its own syntax error
+        retry_prompt = f"""\
+The code you returned has a syntax error:
+
+<error>
+{error_msg}
+</error>
+
+<code>
+{step_code}
+</code>
+
+<task>
+  Fix the syntax error and return the complete corrected module.
+  Do not include any explanation outside the code.
+</task>"""
+
+        messages_retry = [
+            {"role": "system", "content": system_exec},
+            {"role": "user",   "content": retry_prompt},
+        ]
+
+        print_messages(messages_retry)
+        retry_response = chat(client, messages_retry, temperature=0.1, max_tokens=4096)
+        retry_code = strip_code_fences(retry_response)
+
+        ok2, error_msg2 = validate_syntax(retry_code)
+        if ok2:
+            print(f"  [PASS] Step {step_id} – retry syntax OK")
+            current_code = retry_code
+        else:
+            print(f"  [FAIL] Step {step_id} – retry still has errors: {error_msg2}")
+            print("  Continuing with last valid code.")
+
+    print(f"\n--- Code after Step {step_id} ---")
+    print(current_code)
+    print()
+
+# Save final result
+Path("analyze_me_fixed.py").write_text(current_code)
+print("\nSaved iterative CoT result to analyze_me_fixed.py")
+
+# Final validation: run the module
+print_separator("Final Validation – Running analyze_me_fixed.py")
+result = subprocess.run(
+    [sys.executable, "analyze_me_fixed.py"],
+    capture_output=True, text=True,
+)
+print("STDOUT:", result.stdout)
+if result.stderr:
+    print("STDERR:", result.stderr)
+print(f"Exit code: {result.returncode}")
+
+
+# ── Part C: Baseline – Direct Prompt Without CoT ─────────────────────────────
+print_separator("Part C – Baseline: Direct Prompt Without CoT")
+
+direct_prompt = f"""\
+<problem>
+{PROBLEM}
+</problem>
+
+<code language="python" filename="analyze_me.py">
+{code_to_fix}
+</code>
+
+<task>
+  Rewrite the module so that it satisfies all requirements in <problem>.
+  Return only the corrected Python code.
+</task>"""
+
+messages_direct = [{"role": "user", "content": direct_prompt}]
+print_messages(messages_direct)
+direct_response = chat(client, messages_direct, temperature=0.3, max_tokens=4096)
+print(direct_response)
+
+Path("analyze_me_direct.py").write_text(strip_code_fences(direct_response))
+print("\nSaved direct-prompt result to analyze_me_direct.py")
+
+print(
+    "\nCompare analyze_me_fixed.py (CoT) with analyze_me_direct.py (direct).\n"
+    "Which is more complete? Which follows the requirements more closely?"
+)
+
+
+# ── Reflection Questions ──────────────────────────────────────────────────────
+print_separator("Reflection Questions")
+print(
+    "1. How did the iterative CoT output differ from the direct single-shot?\n"
+    "2. Did the validation step catch any syntax errors? How were they fixed?\n"
+    "3. What would happen if you gave the model a deliberately wrong plan?\n"
+    "4. How does this manual CoT pipeline relate to built-in thinking modes\n"
+    "   in models like o1, DeepSeek-R1, and Qwen3 with think mode enabled?\n"
+    "5. What are the trade-offs of step-by-step iteration vs. one-shot?\n"
+    "   (Think: latency, cost, error isolation, debuggability)\n"
+    "6. How could you extend the validation step beyond syntax checking?\n"
+    "   (Hint: unit tests, type checking, linting)\n"
+)
diff --git a/Prompting Exercise/prompting_exercises.pdf b/Prompting Exercise/prompting_exercises.pdf
new file mode 100644
index 0000000..5fc43f6
Binary files /dev/null and b/Prompting Exercise/prompting_exercises.pdf differ
diff --git a/Prompting Exercise/server_utils.py b/Prompting Exercise/server_utils.py
new file mode 100644
index 0000000..aad3134
--- /dev/null
+++ b/Prompting Exercise/server_utils.py	
@@ -0,0 +1,215 @@
+"""
+server_utils.py  –  Shared utilities for AISE501 Prompting Exercises
+======================================================================
+Connects to the vLLM inference server at silicon.fhgr.ch via the
+OpenAI-compatible API.
+
+This file is complete — no TODOs here.
+"""
+
+from openai import OpenAI
+
+# ── Server configuration ──────────────────────────────────────────────────────
+HOST    = "silicon.fhgr.ch"
+PORT    = 7080
+API_KEY = "EMPTY"
+MODEL   = "qwen3.5-35b-a3b"   # model ID served on silicon.fhgr.ch
+
+
+def get_client() -> OpenAI:
+    """Return an OpenAI-compatible client pointing at the vLLM server."""
+    base_url = f"http://{HOST}:{PORT}/v1"
+    return OpenAI(base_url=base_url, api_key=API_KEY)
+
+
+def list_models(client: OpenAI) -> list[str]:
+    """Return all model IDs available on the server."""
+    return [m.id for m in client.models.list().data]
+
+
+def chat(
+    client: OpenAI,
+    messages: list[dict],
+    model: str = MODEL,
+    temperature: float = 0.2,
+    max_tokens: int = 2048,
+) -> str:
+    """
+    Send a list of chat messages to the LLM and return the response text.
+
+    Qwen3's built-in chain-of-thought "think" mode is disabled via
+    ``extra_body`` so that replies are direct and not wrapped in
+    <think>…</think> blocks.
+
+    Parameters
+    ----------
+    client      : OpenAI client returned by get_client()
+    messages    : List of {"role": ..., "content": ...} dicts
+    model       : Model ID (default: module-level MODEL constant)
+    temperature : Sampling temperature (0 = deterministic, 1 = creative)
+    max_tokens  : Maximum number of tokens in the response
+    """
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+    )
+    return response.choices[0].message.content
+
+
+def chat_json(
+    client: OpenAI,
+    messages: list[dict],
+    model: str = MODEL,
+    temperature: float = 0.2,
+    max_tokens: int = 2048,
+) -> str:
+    """
+    Like chat(), but forces the model to emit syntactically valid JSON via
+    response_format={"type": "json_object"}.
+
+    The server constrains token sampling so the output is always parseable
+    by json.loads() — no post-processing needed.  Use this whenever you
+    need structured JSON output (Exercises 3 and 4).
+
+    Parameters are the same as chat(); temperature defaults to 0.2 because
+    deterministic output is usually preferable for structured data.
+    """
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        response_format={"type": "json_object"},
+        extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+    )
+    return response.choices[0].message.content
+
+
+def _repair_json_strings(text: str) -> str:
+    """
+    Replace unescaped control characters (newline, tab, carriage return)
+    inside JSON string values with their proper escape sequences.
+
+    LLMs frequently emit literal newlines inside long string values, which
+    is invalid JSON. This function fixes that without touching structural
+    whitespace outside strings.
+    """
+    result: list[str] = []
+    in_string = False
+    escape = False
+    _escapes = {'\n': '\\n', '\r': '\\r', '\t': '\\t'}
+    for ch in text:
+        if escape:
+            result.append(ch)
+            escape = False
+            continue
+        if ch == '\\' and in_string:
+            result.append(ch)
+            escape = True
+            continue
+        if ch == '"':
+            in_string = not in_string
+            result.append(ch)
+            continue
+        if in_string and ch in _escapes:
+            result.append(_escapes[ch])
+            continue
+        result.append(ch)
+    return ''.join(result)
+
+
+def extract_json(text: str) -> str:
+    """
+    Extract and repair a JSON object or array from an LLM response that may
+    contain extra prose, markdown code fences, or unescaped control characters.
+
+    Strategy:
+      1. Strip markdown ```json ... ``` or ``` ... ``` fences.
+      2. Find the first '{' or '[' and extract to the matching closing bracket.
+      3. Repair unescaped newlines/tabs inside string values.
+
+    Returns the cleaned JSON string, or the original text as a fallback
+    (so json.loads can raise a meaningful error with context).
+    """
+    import re
+
+    # 1. Strip markdown fences
+    fenced = re.sub(r"```(?:json)?\s*([\s\S]*?)\s*```", r"\1", text.strip())
+    if fenced != text.strip():
+        return _repair_json_strings(fenced.strip())
+
+    # 2. Find first JSON container and extract to matching close
+    extracted = text
+    for start_char, end_char in [('{', '}'), ('[', ']')]:
+        idx = text.find(start_char)
+        if idx == -1:
+            continue
+        depth = 0
+        in_string = False
+        escape = False
+        for i, ch in enumerate(text[idx:], start=idx):
+            if escape:
+                escape = False
+                continue
+            if ch == '\\' and in_string:
+                escape = True
+                continue
+            if ch == '"':
+                in_string = not in_string
+                continue
+            if in_string:
+                continue
+            if ch == start_char:
+                depth += 1
+            elif ch == end_char:
+                depth -= 1
+                if depth == 0:
+                    extracted = text[idx: i + 1]
+                    break
+        break
+
+    # 3. Repair unescaped control characters inside string values
+    return _repair_json_strings(extracted)
+
+
+def strip_code_fences(text: str) -> str:
+    """Remove markdown code fences (```python ... ```) from LLM output.
+
+    LLMs often wrap code in fences even when told not to. Call this before
+    writing LLM-generated code to a .py file so it is directly executable.
+    """
+    import re
+    text = text.strip()
+    text = re.sub(r"^```\w*\n?", "", text)
+    text = re.sub(r"\n?```\s*$", "", text)
+    return text.strip()
+
+
+def print_messages(messages: list[dict]) -> None:
+    """Print the full messages list before sending it to the LLM.
+
+    Call this before chat() or chat_json() to inspect the exact prompt
+    hierarchy (system + user + assistant turns) that the model receives.
+    This is the primary debugging and learning tool for prompt engineering.
+    """
+    width = 64
+    print("\n" + "═" * width)
+    print("  PROMPT SENT TO LLM")
+    print("═" * width)
+    for msg in messages:
+        role = msg["role"].upper()
+        print(f"\n── [{role}] " + "─" * max(0, width - len(role) - 6))
+        print(msg["content"])
+    print("\n" + "═" * width)
+
+
+def print_separator(title: str = "") -> None:
+    """Print a visual separator with an optional title."""
+    width = 64
+    print("\n" + "─" * width)
+    if title:
+        print(f"  {title}")
+        print("─" * width)
diff --git a/Prompting Exercise/test_connection.py b/Prompting Exercise/test_connection.py
new file mode 100644
index 0000000..1404db6
--- /dev/null
+++ b/Prompting Exercise/test_connection.py	
@@ -0,0 +1,23 @@
+"""
+test_connection.py  –  Verify the vLLM server connection
+=========================================================
+Run this script from the prompting_exercises/ directory before starting
+the exercises:
+
+    python test_connection.py
+
+Expected output:
+    Models available: ['qwen3.5-35b-a3b']
+    Connection OK.
+"""
+
+from server_utils import get_client, list_models
+
+client = get_client()
+models = list_models(client)
+print(f"Models available: {models}")
+
+if models:
+    print("Connection OK.")
+else:
+    print("WARNING: no models returned – check server address and port.")
diff --git a/code_embeddings_pca.png b/code_embeddings_pca.png
new file mode 100644
index 0000000..ead9793
Binary files /dev/null and b/code_embeddings_pca.png differ
diff --git a/code_embeddings_tsne.png b/code_embeddings_tsne.png
new file mode 100644
index 0000000..842df8d
Binary files /dev/null and b/code_embeddings_tsne.png differ
diff --git a/pca_denoising_analysis.png b/pca_denoising_analysis.png
new file mode 100644
index 0000000..8461c6d
Binary files /dev/null and b/pca_denoising_analysis.png differ
diff --git a/Übung: Clean Code/Student Grade Calculator.py b/Übung: Clean Code/Student Grade Calculator.py
new file mode 100644
index 0000000..e650a1c
--- /dev/null
+++ b/Übung: Clean Code/Student Grade Calculator.py	
@@ -0,0 +1,72 @@
+"""
+Bad example
+"""
+
+def calc (l) :
+    t =0
+    for i in l:
+        t = t + i
+    a = t / len (l)
+    if a >=90:
+        g = "A"
+    elif a >=80:
+        g = "B"
+    elif a >=70:
+        g = "C"
+    elif a >=60:
+        g = "D"
+    else :
+        g = "F"
+    return g, a
+
+def doeverything (n, s1, s2, s3, s4, s5) :
+    print ("Processing student :"+ n)
+    l = [s1, s2, s3, s4, s5]
+    r = calc (l)
+    print ("Average :"+ str (r [1]))
+    print ("Grade :"+ r [0])
+    if r[1] >= 60:
+        print ("Status : PASSED")
+    else:
+        print ("Status : FAILED")
+    return r
+
+# main program
+x = "John"
+doeverything (x,85,90,78,92,88)
+print ("---")
+y = "Jane"
+doeverything (y,55,60,45,50,58)
+print ("---")
+z = "Bob"
+doeverything (z,70,75,80,72,78)
+
+"""
+[x] Naming conventions (variables, functions, classes)
+[x] Code structure and indentation
+[x] Magic numbers and constants
+[x] Function length and single responsibility
+[ ] DRY principle (Don’t Repeat Yourself)
+[x] Comments and documentation
+[x] Error handling
+[x] Whitespace and formatting
+[ ] Mutable default arguments
+"""
+
+"""
+good example
+"""
+
+def calculate_avg(points: list[int]) -> float:
+    return sum(points) / len(points)
+
+def calculate_grade(point_avg: float) -> str:
+    grade_dict = {
+        (lambda avg: avg >= 90): "A",
+        (lambda avg: avg >= 80): "B",
+        (lambda avg: avg >= 70): "C",
+        (lambda avg: avg >= 60): "D",
+        (lambda avg: avg < 60): "F"
+    }
+
+    return grade_dict.get(point_avg)
\ No newline at end of file