Add lessons week 1-5

This commit is contained in:
Michael Schären 2026-03-19 16:52:19 +01:00
parent 7b95472101
commit 88f2d74d53
77 changed files with 8049 additions and 0 deletions

172
.gitignore vendored Normal file
View File

@ -0,0 +1,172 @@
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
**/.env
**/.venv
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

10
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,10 @@
# Default ignored files
/shelf/
/workspace.xml
# Ignored default folder with query files
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

14
.idea/aise-501_aise_in_se_i.iml generated Normal file
View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.12 (aise-501_aise_in_se_i)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

12
.idea/dataSources.xml generated Normal file
View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
<data-source source="LOCAL" name="postgres@localhost" uuid="be9eece5-a8ff-447a-a6a9-4660fffe89da">
<driver-ref>postgresql</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.postgresql.Driver</jdbc-driver>
<jdbc-url>jdbc:postgresql://localhost:5432/postgres</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
</component>
</project>

6
.idea/data_source_mapping.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DataSourcePerFileMappings">
<file url="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.sql" value="be9eece5-a8ff-447a-a6a9-4660fffe89da" />
</component>
</project>

View File

@ -0,0 +1,7 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="PROJECT_PROFILE" value="Default" />
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml generated Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.12" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (aise-501_aise_in_se_i)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/aise-501_aise_in_se_i.iml" filepath="$PROJECT_DIR$/.idea/aise-501_aise_in_se_i.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@ -0,0 +1,272 @@
# Student Guide — Qwen3.5 Inference Server
## Overview
A **Qwen3.5** large language model is running on our GPU server. Two models
may be available at different times (your instructor will let you know which
one is active):
| Model | Params | Best for |
|-------|--------|----------|
| `qwen3.5-35b-a3b` | 35B (3B active) | Fast responses, everyday tasks |
| `qwen3.5-122b-a10b-fp8` | 122B (10B active) | Complex reasoning, coding, research |
There are **three ways** to interact with the model:
1. **Open WebUI** — ChatGPT-like interface in your browser (easiest)
2. **Streamlit App** — Local app with chat, file editor, and code execution
3. **Python SDK / curl** — Programmatic access via the OpenAI-compatible API
> **Note**: You must be on the fhgr network or VPN to reach the server.
## Connection Details
| Parameter | Value |
|------------------|---------------------------------------------|
| **Open WebUI** | `http://silicon.fhgr.ch:7081` |
| **API Base URL** | `http://silicon.fhgr.ch:7080/v1` |
| **Model** | *(check Open WebUI model selector or ask your instructor)* |
| **API Key** | *(ask your instructor — may be `EMPTY`)* |
> **Tip**: In Open WebUI, the model dropdown at the top automatically shows
> whichever model is currently running. For the API, use
> `curl http://silicon.fhgr.ch:7080/v1/models` to check.
---
## Option 1: Open WebUI (Recommended)
The easiest way to chat with the model — no installation required.
### Getting Started
1. Make sure you are connected to the **university network** (or VPN).
2. Open your browser and go to **http://silicon.fhgr.ch:7081**
3. Click **"Sign Up"** to create a new account:
- Enter your **name** (e.g. your first and last name)
- Enter your **email** (use your university email)
- Choose a **password**
- Click **"Create Account"**
4. After signing up you are logged in automatically.
5. Select the model **qwen3.5-35b-a3b** from the model dropdown at the top.
6. Type a message and press Enter — you're chatting with the LLM.
### Returning Later
- Go to **http://silicon.fhgr.ch:7081** and click **"Sign In"**.
- Enter the email and password you used during sign-up.
- All your previous chats are still there.
### Features
- **Chat history** — all conversations are saved on the server and persist across sessions
- **Markdown rendering** with syntax-highlighted code blocks
- **Model selector** — auto-discovers available models from the server
- **Conversation branching** — edit previous messages and explore alternative responses
- **File upload** — attach files to your messages for the model to analyze
- **Search** — search across all your past conversations
### Tips
- Your account and chat history are stored on the server. You can log in
from any device on the university network.
- If you forget your password, ask your instructor to reset it via the
Admin Panel.
- The model works best when you provide clear, specific instructions.
- For code tasks, mention the programming language explicitly (e.g.
"Write a Python function that...").
- Long conversations use more context. Start a **New Chat** (top-left
button) when switching topics to get faster, more focused responses.
---
## Option 2: Streamlit App (Chat + File Editor)
A local app with chat, file editing, and Python/LaTeX execution.
See the [Streamlit section below](#streamlit-chat--file-editor-app) for setup.
---
## Option 3: Python SDK / curl
For programmatic access and scripting.
### Quick Start with Python
#### 1. Install the OpenAI SDK
```bash
pip install openai
```
#### 2. Simple Chat
```python
from openai import OpenAI
client = OpenAI(
base_url="http://silicon.fhgr.ch:7080/v1",
api_key="EMPTY", # replace if your instructor set a key
)
response = client.chat.completions.create(
model="qwen3.5-35b-a3b",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain gradient descent in simple terms."},
],
max_tokens=1024,
temperature=0.7,
)
print(response.choices[0].message.content)
```
#### 3. Streaming Responses
```python
stream = client.chat.completions.create(
model="qwen3.5-35b-a3b",
messages=[
{"role": "user", "content": "Write a haiku about machine learning."},
],
max_tokens=256,
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print()
```
---
### Quick Start with curl
```bash
curl http://silicon.fhgr.ch:7080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "qwen3.5-35b-a3b",
"messages": [
{"role": "user", "content": "What is the capital of Switzerland?"}
],
"max_tokens": 256,
"temperature": 0.7
}'
```
---
## Recommended Parameters
| Parameter | Recommended | Notes |
|-----------------|-------------|----------------------------------------------|
| `temperature` | 0.7 | Lower = more deterministic, higher = creative |
| `max_tokens` | 10244096 | Increase for long-form output |
| `top_p` | 0.95 | Nucleus sampling |
| `stream` | `true` | Better UX for interactive use |
---
## Tips & Etiquette
- **Be mindful of context length**: Avoid excessively long prompts (>8K tokens) unless necessary.
- **Use streaming**: Makes responses feel faster and reduces perceived latency.
- **Don't spam requests**: The server is shared among ~15 students.
- **Check the model name**: Always use `qwen3.5-35b-a3b` as the model parameter.
---
## Streamlit Chat & File Editor App
A web UI is included for chatting with the model and editing files. It runs
on your own machine and connects to the GPU server.
### Setup
```bash
# Clone the repository
git clone https://gitea.fhgr.ch/herzogfloria/LLM_Inferenz_Server_1.git
cd LLM_Inferenz_Server_1
# Create a virtual environment and install dependencies
python3 -m venv .venv
source .venv/bin/activate # macOS / Linux
# .venv\Scripts\activate # Windows
pip install -r requirements.txt
```
### Run
```bash
streamlit run app.py
```
Opens at `http://localhost:8501` in your browser.
### Features
**Chat Tab**
- Conversational interface with streaming responses
- "Save code" button extracts code from the LLM response and saves it to a
workspace file (strips markdown formatting automatically)
**File Editor Tab**
- Create and edit `.py`, `.tex`, `.html`, or any text file
- Syntax-highlighted preview of file content
- "Generate with LLM" button: describe a change in natural language and the
model rewrites the file (e.g. "add error handling", "fix the LaTeX formatting",
"translate comments to German")
**Sidebar Controls**
- **Connection**: API Base URL and API Key
- **LLM Parameters**: Adjustable for each request
| Parameter | Default | What it does |
|-----------|---------|--------------|
| Thinking Mode | Off | Toggle chain-of-thought reasoning (better for complex tasks, slower) |
| Temperature | 0.7 | Lower = predictable, higher = creative |
| Max Tokens | 4096 | Maximum response length |
| Top P | 0.95 | Nucleus sampling threshold |
| Presence Penalty | 0.0 | Encourage diverse topics |
- **File Manager**: Create new files and switch between them
All generated files are stored in a `workspace/` folder next to `app.py`.
> **Tip**: The app runs entirely on your local machine. Only the LLM requests
> go to the server — your files stay local.
---
## Thinking Mode
By default, the model "thinks" before answering (internal chain-of-thought).
This is great for complex reasoning but adds latency for simple questions.
To disable thinking and get faster direct responses, add this to your API call:
```python
response = client.chat.completions.create(
model="qwen3.5-35b-a3b",
messages=[...],
max_tokens=1024,
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
)
```
---
## Troubleshooting
| Issue | Solution |
|-----------------------------|-----------------------------------------------------|
| Connection refused | Check you're on the university network / VPN |
| Model not found | Use model name `qwen3.5-35b-a3b` exactly |
| Slow responses | The model is shared — peak times may be slower |
| `401 Unauthorized` | Ask your instructor for the API key |
| Response cut off | Increase `max_tokens` in your request |
| Open WebUI login fails | Make sure you created an account first (Sign Up) |
| Open WebUI shows no models | The vLLM server may still be loading — wait a few minutes |

346
AISE501 LLM Zugang/app.py Normal file
View File

@ -0,0 +1,346 @@
"""
Streamlit Chat & File Editor for Qwen3.5
A minimal interface to:
1. Chat with the local LLM (OpenAI-compatible API)
2. Edit, save, and generate code / LaTeX files
Usage:
pip install streamlit openai
streamlit run app.py
"""
import re
import subprocess
import streamlit as st
from openai import OpenAI
from pathlib import Path
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
st.sidebar.header("Connection")
API_BASE = st.sidebar.text_input("API Base URL", "http://silicon.fhgr.ch:7080/v1")
API_KEY = st.sidebar.text_input("API Key", "EMPTY", type="password")
WORKSPACE = Path("workspace")
WORKSPACE.mkdir(exist_ok=True)
client = OpenAI(base_url=API_BASE, api_key=API_KEY)
@st.cache_data(ttl=30)
def fetch_models(base_url: str, api_key: str) -> list[str]:
"""Fetch available model IDs from the vLLM server."""
try:
c = OpenAI(base_url=base_url, api_key=api_key)
return [m.id for m in c.models.list().data]
except Exception:
return []
available_models = fetch_models(API_BASE, API_KEY)
if available_models:
MODEL = st.sidebar.selectbox("Model", available_models)
else:
MODEL = st.sidebar.text_input("Model (server unreachable)", "qwen3.5-35b-a3b")
st.sidebar.warning("Could not fetch models from server.")
# ---------------------------------------------------------------------------
# Sidebar — LLM Parameters
# ---------------------------------------------------------------------------
st.sidebar.markdown("---")
st.sidebar.header("LLM Parameters")
thinking_mode = st.sidebar.toggle("Thinking Mode", value=False,
help="Enable chain-of-thought reasoning. Better for complex tasks, slower for simple ones.")
temperature = st.sidebar.slider("Temperature", 0.0, 2.0, 0.7, 0.05,
help="Lower = deterministic, higher = creative.")
max_tokens = st.sidebar.slider("Max Tokens", 256, 16384, 4096, 256,
help="Maximum length of the response.")
top_p = st.sidebar.slider("Top P", 0.0, 1.0, 0.95, 0.05,
help="Nucleus sampling: only consider tokens within this cumulative probability.")
presence_penalty = st.sidebar.slider("Presence Penalty", 0.0, 2.0, 0.0, 0.1,
help="Penalize repeated topics. Higher values encourage the model to talk about new topics.")
LANG_MAP = {
".py": "python", ".tex": "latex", ".js": "javascript",
".html": "html", ".css": "css", ".sh": "bash",
".json": "json", ".yaml": "yaml", ".yml": "yaml",
}
MAX_CONTEXT = 32768
def extract_code(text: str, lang: str = "") -> str:
"""Extract the best code block from markdown text.
Strategy:
1. Prefer blocks tagged with the target language (e.g. ```python)
2. Among candidates, pick the longest block (skip trivial one-liners)
3. Fall back to the longest block of any language
4. Fall back to the full text if no fenced block is found
"""
tagged_pattern = r"```(\w*)\n(.*?)```"
matches = re.findall(tagged_pattern, text, re.DOTALL)
if not matches:
return text.strip()
lang_lower = lang.lower()
lang_matches = [code for tag, code in matches if tag.lower() == lang_lower]
if lang_matches:
return max(lang_matches, key=len).strip()
all_blocks = [code for _, code in matches]
return max(all_blocks, key=len).strip()
def estimate_tokens(messages: list[dict]) -> int:
"""Rough token estimate: ~4 characters per token."""
return sum(len(m["content"]) for m in messages) // 4
def trim_history(messages: list[dict], reserved: int) -> list[dict]:
"""Drop oldest message pairs to fit within context budget.
Always keeps the latest user message."""
budget = MAX_CONTEXT - reserved
while len(messages) > 1 and estimate_tokens(messages) > budget:
messages.pop(0)
return messages
RUNNABLE_EXTENSIONS = {".py", ".tex"}
RUN_TIMEOUT = 30
def run_file(file_path: Path) -> dict:
"""Execute a .py or .tex file and return stdout, stderr, and return code."""
suffix = file_path.suffix
cwd = file_path.parent.resolve()
if suffix == ".py":
cmd = ["python3", file_path.name]
elif suffix == ".tex":
cmd = [
"pdflatex",
"-interaction=nonstopmode",
f"-output-directory={cwd}",
file_path.name,
]
else:
return {"stdout": "", "stderr": f"Unsupported file type: {suffix}", "rc": 1}
try:
proc = subprocess.run(
cmd,
cwd=cwd,
capture_output=True,
text=True,
timeout=RUN_TIMEOUT,
)
return {"stdout": proc.stdout, "stderr": proc.stderr, "rc": proc.returncode}
except subprocess.TimeoutExpired:
return {"stdout": "", "stderr": f"Timed out after {RUN_TIMEOUT}s", "rc": -1}
except FileNotFoundError as e:
return {"stdout": "", "stderr": str(e), "rc": -1}
# ---------------------------------------------------------------------------
# Sidebar — File Manager
# ---------------------------------------------------------------------------
st.sidebar.markdown("---")
st.sidebar.header("File Manager")
new_filename = st.sidebar.text_input("New file name", placeholder="main.tex")
if st.sidebar.button("Create File") and new_filename:
(WORKSPACE / new_filename).touch()
st.sidebar.success(f"Created {new_filename}")
st.rerun()
files = sorted(WORKSPACE.iterdir()) if WORKSPACE.exists() else []
file_names = [f.name for f in files if f.is_file()]
selected_file = st.sidebar.selectbox("Open file", file_names if file_names else ["(no files)"])
# ---------------------------------------------------------------------------
# Main Layout — Two Tabs
# ---------------------------------------------------------------------------
tab_chat, tab_editor = st.tabs(["Chat", "File Editor"])
# ---------------------------------------------------------------------------
# Tab 1: Chat
# ---------------------------------------------------------------------------
with tab_chat:
st.header(f"Chat with {MODEL}")
if "messages" not in st.session_state:
st.session_state.messages = []
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if prompt := st.chat_input("Ask anything..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages = trim_history(
st.session_state.messages, reserved=max_tokens
)
with st.chat_message("assistant"):
placeholder = st.empty()
full_response = ""
stream = client.chat.completions.create(
model=MODEL,
messages=st.session_state.messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
presence_penalty=presence_penalty,
stream=True,
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
)
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
full_response += delta
placeholder.markdown(full_response + "")
placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
if st.session_state.messages:
used = estimate_tokens(st.session_state.messages)
pct = min(used / MAX_CONTEXT, 1.0)
label = f"Context: ~{used:,} / {MAX_CONTEXT:,} tokens"
if pct > 0.8:
label += " ⚠️ nearing limit — older messages will be trimmed"
st.progress(pct, text=label)
col_clear, col_save = st.columns([1, 3])
with col_clear:
if st.button("Clear Chat"):
st.session_state.messages = []
st.rerun()
with col_save:
if selected_file and selected_file != "(no files)":
if st.button(f"Save code → {selected_file}"):
last = st.session_state.messages[-1]["content"]
suffix = Path(selected_file).suffix
lang = LANG_MAP.get(suffix, "")
code = extract_code(last, lang)
(WORKSPACE / selected_file).write_text(code)
st.success(f"Extracted code saved to workspace/{selected_file}")
# ---------------------------------------------------------------------------
# Tab 2: File Editor
# ---------------------------------------------------------------------------
with tab_editor:
st.header("File Editor")
if selected_file and selected_file != "(no files)":
file_path = WORKSPACE / selected_file
content = file_path.read_text() if file_path.exists() else ""
suffix = file_path.suffix
lang = LANG_MAP.get(suffix, "text")
runnable = suffix in RUNNABLE_EXTENSIONS
if runnable:
col_edit, col_term = st.columns([3, 2])
else:
col_edit = st.container()
with col_edit:
st.code(content, language=lang if lang != "text" else None, line_numbers=True)
edited = st.text_area(
"Edit below:",
value=content,
height=400,
key=f"editor_{selected_file}_{hash(content)}",
)
col_save, col_gen = st.columns(2)
with col_save:
if st.button("Save File"):
file_path.write_text(edited)
st.success(f"Saved {selected_file}")
st.rerun()
with col_gen:
gen_prompt = st.text_input(
"Generation instruction",
placeholder="e.g. Add error handling / Fix the LaTeX formatting",
key="gen_prompt",
)
if st.button("Generate with LLM") and gen_prompt:
with st.spinner("Generating..."):
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": (
f"You are a coding assistant. The user has a {lang} file. "
"Return ONLY the raw file content inside a single code block. "
"No explanations, no comments about changes."
)},
{"role": "user", "content": (
f"Here is my {lang} file:\n\n```\n{edited}\n```\n\n"
f"Instruction: {gen_prompt}"
)},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
extra_body={"chat_template_kwargs": {"enable_thinking": thinking_mode}},
)
result = response.choices[0].message.content
code = extract_code(result, lang)
file_path.write_text(code)
st.success("File updated by LLM")
st.rerun()
if runnable:
with col_term:
run_label = "Compile LaTeX" if suffix == ".tex" else "Run Python"
st.subheader("Terminal Output")
if st.button(run_label, type="primary"):
file_path.write_text(edited)
with st.spinner(f"{'Compiling' if suffix == '.tex' else 'Running'}..."):
result = run_file(file_path)
st.session_state["last_run"] = result
result = st.session_state.get("last_run")
if result:
if result["rc"] == 0:
st.success(f"Exit code: {result['rc']}")
else:
st.error(f"Exit code: {result['rc']}")
if result["stdout"]:
st.text_area(
"stdout",
value=result["stdout"],
height=300,
disabled=True,
key="run_stdout",
)
if result["stderr"]:
st.text_area(
"stderr",
value=result["stderr"],
height=200,
disabled=True,
key="run_stderr",
)
if not result["stdout"] and not result["stderr"]:
st.info("No output produced.")
else:
st.caption(
f"Click **{run_label}** to execute the file "
f"(timeout: {RUN_TIMEOUT}s)."
)
else:
st.info("Create a file in the sidebar to start editing.")

View File

@ -0,0 +1,2 @@
streamlit
openai

View File

@ -0,0 +1,70 @@
"""
Quick test script to verify the vLLM server is running and responding.
Usage:
pip install openai
python test_server.py [--host HOST] [--port PORT] [--api-key KEY]
"""
import argparse
import sys
from openai import OpenAI
def main():
parser = argparse.ArgumentParser(description="Test vLLM inference server")
parser.add_argument("--host", default="localhost", help="Server hostname")
parser.add_argument("--port", default=7080, type=int, help="Server port")
parser.add_argument("--api-key", default="EMPTY", help="API key")
args = parser.parse_args()
base_url = f"http://{args.host}:{args.port}/v1"
model = "qwen3.5-35b-a3b"
client = OpenAI(base_url=base_url, api_key=args.api_key)
print(f"Connecting to {base_url} ...")
print("\n--- Available Models ---")
try:
models = client.models.list()
for m in models.data:
print(f" {m.id}")
except Exception as e:
print(f"ERROR: Cannot connect to server: {e}")
sys.exit(1)
print("\n--- Test Chat Completion ---")
response = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Create a latex document that derives and explains the principle component analysis (pca). Make a self contain document with introduction, derivation, examples of applications. This is for computer science undergraduate class."}
],
max_tokens=16384,
temperature=0.7,
)
print(f" Response: {response.choices[0].message.content}")
print(f" Tokens: prompt={response.usage.prompt_tokens}, "
f"completion={response.usage.completion_tokens}")
print("\n--- Test Streaming ---")
stream = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Count from 1 to 5."}
],
max_tokens=16384,
temperature=0.7,
stream=True,
)
print(" Response: ", end="")
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print("\n")
print("All tests passed!")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,18 @@
\relax
\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
\@nameuse{bbl@beforestart}
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\babel@aux{english}{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{2}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Violation 1: Unused and Poorly Formatted Imports}{2}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Violation 2: No Module Docstring or Documentation}{2}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Violation 3: Poor Naming Conventions}{3}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Violation 4: Formatting and Whitespace}{4}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Violation 5: Error Handling}{5}{section.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {7}Violation 6: Function Structure and Single Responsibility}{6}{section.7}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {8}Violation 7: Missing \texttt {\_\_main\_\_} Guard}{7}{section.8}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {9}Violation 8: String Concatenation Instead of f-Strings}{7}{section.9}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {10}Summary of Violations}{8}{section.10}\protected@file@percent }
\gdef \@abspage@last{8}

View File

@ -0,0 +1,10 @@
\BOOKMARK [1][-]{section.1}{\376\377\000O\000v\000e\000r\000v\000i\000e\000w}{}% 1
\BOOKMARK [1][-]{section.2}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0001\000:\000\040\000U\000n\000u\000s\000e\000d\000\040\000a\000n\000d\000\040\000P\000o\000o\000r\000l\000y\000\040\000F\000o\000r\000m\000a\000t\000t\000e\000d\000\040\000I\000m\000p\000o\000r\000t\000s}{}% 2
\BOOKMARK [1][-]{section.3}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0002\000:\000\040\000N\000o\000\040\000M\000o\000d\000u\000l\000e\000\040\000D\000o\000c\000s\000t\000r\000i\000n\000g\000\040\000o\000r\000\040\000D\000o\000c\000u\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 3
\BOOKMARK [1][-]{section.4}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0003\000:\000\040\000P\000o\000o\000r\000\040\000N\000a\000m\000i\000n\000g\000\040\000C\000o\000n\000v\000e\000n\000t\000i\000o\000n\000s}{}% 4
\BOOKMARK [1][-]{section.5}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0004\000:\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g\000\040\000a\000n\000d\000\040\000W\000h\000i\000t\000e\000s\000p\000a\000c\000e}{}% 5
\BOOKMARK [1][-]{section.6}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0005\000:\000\040\000E\000r\000r\000o\000r\000\040\000H\000a\000n\000d\000l\000i\000n\000g}{}% 6
\BOOKMARK [1][-]{section.7}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0006\000:\000\040\000F\000u\000n\000c\000t\000i\000o\000n\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000\040\000a\000n\000d\000\040\000S\000i\000n\000g\000l\000e\000\040\000R\000e\000s\000p\000o\000n\000s\000i\000b\000i\000l\000i\000t\000y}{}% 7
\BOOKMARK [1][-]{section.8}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0007\000:\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000\137\000\137\000m\000a\000i\000n\000\137\000\137\000\040\000G\000u\000a\000r\000d}{}% 8
\BOOKMARK [1][-]{section.9}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0008\000:\000\040\000S\000t\000r\000i\000n\000g\000\040\000C\000o\000n\000c\000a\000t\000e\000n\000a\000t\000i\000o\000n\000\040\000I\000n\000s\000t\000e\000a\000d\000\040\000o\000f\000\040\000f\000-\000S\000t\000r\000i\000n\000g\000s}{}% 9
\BOOKMARK [1][-]{section.10}{\376\377\000S\000u\000m\000m\000a\000r\000y\000\040\000o\000f\000\040\000V\000i\000o\000l\000a\000t\000i\000o\000n\000s}{}% 10

View File

@ -0,0 +1,415 @@
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{geometry}
\geometry{margin=2.5cm}
\usepackage{xcolor}
\usepackage{tcolorbox}
\usepackage{booktabs}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{enumitem}
\definecolor{seblue}{rgb}{0.0,0.28,0.67}
\definecolor{segreen}{rgb}{0.13,0.55,0.13}
\definecolor{sered}{rgb}{0.7,0.13,0.13}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\lstdefinestyle{pystyle}{
backgroundcolor=\color{backcolour},
commentstyle=\color{codegreen},
keywordstyle=\color{blue},
stringstyle=\color{codepurple},
basicstyle=\ttfamily\footnotesize,
breaklines=true,
keepspaces=true,
showstringspaces=false,
tabsize=4,
language=Python
}
\lstset{style=pystyle}
\newtcolorbox{badbox}{
colback=red!5!white,
colframe=sered,
title=Bad Code,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\newtcolorbox{goodbox}{
colback=green!5!white,
colframe=segreen,
title=Clean Code,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\newtcolorbox{principlebox}[1][]{
colback=blue!5!white,
colframe=seblue,
title=#1,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\title{\textcolor{seblue}{Code Analysis: Arithmetic Expression Calculator}\\[0.3em]
\large What Makes Code Bad and How to Fix It\\[0.3em]
\normalsize AISE501 -- AI in Software Engineering I}
\author{Dr.\ Florian Herzog}
\date{Spring Semester 2026}
\begin{document}
\maketitle
\tableofcontents
\newpage
% ============================================
\section{Overview}
% ============================================
This document analyses two implementations of the same program --- an arithmetic expression calculator that parses and evaluates strings like \texttt{"3 + 5 * 2"} without using Python's \texttt{eval()}.
Both produce correct results, but the first version (\texttt{calculator\_bad.py}) violates numerous PEP\,8 and clean code principles, while the second (\texttt{calculator\_good.py}) follows them consistently.
The analysis is structured by violation category, with side-by-side comparisons of the bad and good code and references to the specific PEP\,8 rules or clean code principles that apply.
% ============================================
\section{Violation 1: Unused and Poorly Formatted Imports}
% ============================================
\begin{badbox}
\begin{lstlisting}
import sys,os,re;from typing import *
\end{lstlisting}
\end{badbox}
\textbf{What is wrong:}
\begin{itemize}
\item \texttt{sys}, \texttt{os}, and \texttt{re} are imported but \textbf{never used} anywhere in the code.
\item Multiple imports are crammed onto \textbf{one line separated by commas}, violating PEP\,8's rule that imports should be on separate lines.
\item A \textbf{semicolon} joins two import statements on one line.
\item \texttt{from typing import *} is a \textbf{wildcard import} that pollutes the namespace.
\end{itemize}
\begin{goodbox}
The good version has \textbf{no imports at all} --- the calculator uses only built-in Python features.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Imports}: ``Imports should usually be on separate lines.'' Wildcard imports (\texttt{from X import *}) should be avoided.
\item \textbf{KISS}: Unused imports add unnecessary complexity.
\item \textbf{Clean Code}: Dead code (unused imports) confuses readers about dependencies.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 2: No Module Docstring or Documentation}
% ============================================
\begin{badbox}
\begin{lstlisting}
# calculator program
def scicalc(s):
\end{lstlisting}
The only ``documentation'' is a single vague comment. No module docstring, no function docstrings.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
"""Simple arithmetic expression calculator with a recursive-descent parser.
Supported operations: +, -, *, / and parentheses.
Does NOT use Python's eval().
Grammar:
expression = term (('+' | '-') term)*
term = factor (('*' | '/') factor)*
factor = NUMBER | '(' expression ')'
"""
\end{lstlisting}
The good version opens with a module docstring that explains the purpose, supported operations, and even the formal grammar. Every function also has a docstring.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,257}: All public modules, functions, classes, and methods should have docstrings.
\item \textbf{Clean Code -- Documentation}: Good documentation helps current and future developers understand the intent.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 3: Poor Naming Conventions}
% ============================================
\begin{badbox}
\begin{lstlisting}
def scicalc(s): # What does "scicalc" mean?
def doPlusMinus(s,a,b):# camelCase, not snake_case
def doMulDiv(s,a,b): # "do" is vague
def getNum(s, a,b): # inconsistent spacing
t=s[a:b] # "t" for what?
c=t[i] # "c" for what?
L=doPlusMinus(...) # uppercase "L" for a local variable
R=doMulDiv(...) # uppercase "R" for a local variable
r=doPlusMinus(...) # "r" for result?
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
def tokenize(expression_text):
def parse_expression(tokens, position):
def parse_term(tokens, position):
def parse_factor(tokens, position):
def calculate(expression_text):
character = expression_text[position]
operator = tokens[position]
right_value, position = parse_term(tokens, position)
result, final_position = parse_expression(tokens, 0)
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong in the bad version:}
\begin{itemize}
\item Function names use \textbf{camelCase} (\texttt{doPlusMinus}) instead of \textbf{snake\_case}.
\item Variable names are \textbf{single letters} (\texttt{s}, \texttt{a}, \texttt{b}, \texttt{t}, \texttt{c}, \texttt{r}) --- impossible to understand without reading every line.
\item \texttt{L} and \texttt{R} use \textbf{uppercase} for local variables, which PEP\,8 reserves for constants.
\item Names like \texttt{scicalc} are \textbf{abbreviations} that are not pronounceable or self-explanatory.
\item The list of test data is called \texttt{Data} (capitalised like a class) and results \texttt{Res}.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}.
\item \textbf{Clean Code -- Descriptive Names}: Names should reveal intent. A reader should know what a variable holds without tracing its assignment.
\item \textbf{Clean Code -- Pronounceable Names}: \texttt{scicalc} is not a word anyone would say in a conversation.
\item \textbf{Clean Code -- No Abbreviations}: \texttt{doPlusMinus} is better than \texttt{dPM}, but \texttt{parse\_expression} communicates the actual operation.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 4: Formatting and Whitespace}
% ============================================
\begin{badbox}
\begin{lstlisting}
def scicalc(s):
s=s.replace(' ','') # 2-space indent
if s=='':return 0 # no spaces around ==
r=doPlusMinus(s,0,len(s))
return r
def doPlusMinus(s,a,b):
t=s[a:b]; level=0; i=len(t)-1 # 4-space indent, semicolons
while i>=0: # no space around >=
if level==0 and(c=='*' or c=='/'): # missing space before (
L = doMulDiv(s,a,a+i); R = getNum(s,a+i+1,b)
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
def parse_expression(tokens, position):
result, position = parse_term(tokens, position)
while position < len(tokens) and tokens[position] in ("+", "-"):
operator = tokens[position]
position += 1
right_value, position = parse_term(tokens, position)
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong:}
\begin{itemize}
\item \textbf{Inconsistent indentation}: \texttt{scicalc} uses 2 spaces, other functions use 4 spaces. PEP\,8 requires 4 spaces consistently.
\item \textbf{Semicolons} to put multiple statements on one line (\texttt{t=s[a:b]; level=0; i=len(t)-1}).
\item \textbf{Missing whitespace} around operators: \texttt{s=s.replace}, \texttt{i>=0}, \texttt{level==0 and(c==...}.
\item \textbf{No blank lines} between logical sections within functions or between function definitions. PEP\,8 requires two blank lines before and after top-level functions.
\item Multiple \texttt{return} or assignment statements \textbf{on the same line} as \texttt{if}: \texttt{if s=='':return 0}.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Indentation}: Use 4 spaces per indentation level.
\item \textbf{PEP\,8 -- Whitespace}: Surround binary operators with single spaces. Avoid compound statements on one line.
\item \textbf{PEP\,8 -- Blank Lines}: Two blank lines around top-level definitions.
\item \textbf{Zen of Python}: ``Sparse is better than dense.''
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 5: Error Handling}
% ============================================
\begin{badbox}
\begin{lstlisting}
if R==0:print("ERROR division by zero!!!") ;return 0
\end{lstlisting}
\begin{lstlisting}
try:
x = float(t)
except:
print("bad number: "+t);x=0
return x
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
if right_value == 0:
raise ZeroDivisionError("Division by zero")
\end{lstlisting}
\begin{lstlisting}
try:
tokens = tokenize(expression_text)
result, final_position = parse_expression(tokens, 0)
...
except (ValueError, ZeroDivisionError) as error:
return f"Error: {error}"
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong in the bad version:}
\begin{itemize}
\item \textbf{Bare \texttt{except}} catches every exception including \texttt{KeyboardInterrupt} and \texttt{SystemExit} --- masking real bugs.
\item Errors are handled by \textbf{printing and returning a dummy value} (0), which silently produces wrong results. The caller has no way to know an error occurred.
\item The error message style is inconsistent: \texttt{"ERROR division by zero!!!"} vs.\ \texttt{"bad number: ..."}.
\end{itemize}
\textbf{What the good version does:}
\begin{itemize}
\item Errors \textbf{raise specific exceptions} (\texttt{ValueError}, \texttt{ZeroDivisionError}) at the point of detection.
\item The top-level \texttt{calculate()} function catches \textbf{only expected exceptions} and returns a formatted error string.
\item Errors \textbf{propagate} rather than being silently swallowed.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Exceptions}: Catch specific exceptions, never use bare \texttt{except}.
\item \textbf{Zen of Python}: ``Errors should never pass silently. Unless explicitly silenced.''
\item \textbf{Clean Code -- Error Handling}: Anticipate errors and handle them gracefully. Returning magic values (0 for an error) is an anti-pattern.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 6: Function Structure and Single Responsibility}
% ============================================
\begin{badbox}
The bad version has three intertwined functions (\texttt{doPlusMinus}, \texttt{doMulDiv}, \texttt{getNum}) that each take the \textbf{entire string plus two index parameters} and internally slice the string. Parsing, tokenisation, and evaluation are all mixed together.
\begin{lstlisting}
def doPlusMinus(s,a,b):
t=s[a:b]; level=0; i=len(t)-1
while i>=0:
...
L=doPlusMinus(s,a,a+i);R=doMulDiv(s,a+i+1,b)
...
return doMulDiv(s,a,b)
\end{lstlisting}
\end{badbox}
\begin{goodbox}
The good version separates \textbf{tokenisation} from \textbf{parsing}:
\begin{lstlisting}
tokens = tokenize(expression_text) # Step 1: tokenise
result, position = parse_expression(tokens, 0) # Step 2: parse
\end{lstlisting}
Each parser function has a single, clear responsibility:
\begin{itemize}[nosep]
\item \texttt{tokenize()} -- converts text to tokens
\item \texttt{parse\_expression()} -- handles \texttt{+} and \texttt{-}
\item \texttt{parse\_term()} -- handles \texttt{*} and \texttt{/}
\item \texttt{parse\_factor()} -- handles numbers and parentheses
\item \texttt{calculate()} -- orchestrates the pipeline and error handling
\end{itemize}
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{SRP (Single Responsibility Principle)}: Each function should do one thing.
\item \textbf{SoC (Separation of Concerns)}: Tokenisation and parsing are different concerns.
\item \textbf{Clean Code -- Short Functions}: If a function takes more than a few minutes to comprehend, it should be refactored.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 7: Missing \texttt{\_\_main\_\_} Guard}
% ============================================
\begin{badbox}
\begin{lstlisting}
main()
\end{lstlisting}
The bad version calls \texttt{main()} at the module level. If another script imports this file, the calculator runs immediately as a side effect.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
if __name__ == "__main__":
main()
\end{lstlisting}
The good version uses the standard \texttt{\_\_main\_\_} guard, so the module can be safely imported without executing the calculator.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Clean Code -- Avoid Side Effects}: Importing a module should not trigger execution.
\item \textbf{Python Best Practice}: The \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard is standard for all runnable scripts.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 8: String Concatenation Instead of f-Strings}
% ============================================
\begin{badbox}
\begin{lstlisting}
print(d+" = "+str(Res))
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
print(f"{display_expr} = {result}")
\end{lstlisting}
\end{goodbox}
String concatenation with \texttt{+} and manual \texttt{str()} calls is harder to read than f-strings, which are the idiomatic Python 3.6+ way to format output.
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Pythonic Code}: Use f-strings for string formatting (readable, efficient).
\item \textbf{Clean Code -- Readability}: f-strings make the output format immediately visible.
\end{itemize}
\end{principlebox}
% ============================================
\section{Summary of Violations}
% ============================================
\begin{center}
\small
\begin{tabular}{@{}rp{5cm}p{5.5cm}@{}}
\toprule
\textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\
\midrule
1 & Unused imports, wildcard import, one-line imports & PEP\,8 Imports, KISS \\
2 & No docstrings or documentation & PEP\,257, Clean Code Documentation \\
3 & camelCase names, single-letter variables, abbreviations & PEP\,8 Naming, Descriptive Names \\
4 & Inconsistent indent, semicolons, missing whitespace & PEP\,8 Indentation \& Whitespace \\
5 & Bare except, silent error swallowing & PEP\,8 Exceptions, Zen of Python \\
6 & Mixed concerns, long tangled functions & SRP, SoC, Short Functions \\
7 & No \texttt{\_\_main\_\_} guard & Avoid Side Effects \\
8 & String concatenation instead of f-strings & Pythonic Code, Readability \\
\bottomrule
\end{tabular}
\end{center}
\end{document}

View File

@ -0,0 +1,11 @@
\babel@toc {english}{}\relax
\contentsline {section}{\numberline {1}Overview}{2}{section.1}%
\contentsline {section}{\numberline {2}Violation 1: Unused and Poorly Formatted Imports}{2}{section.2}%
\contentsline {section}{\numberline {3}Violation 2: No Module Docstring or Documentation}{2}{section.3}%
\contentsline {section}{\numberline {4}Violation 3: Poor Naming Conventions}{3}{section.4}%
\contentsline {section}{\numberline {5}Violation 4: Formatting and Whitespace}{4}{section.5}%
\contentsline {section}{\numberline {6}Violation 5: Error Handling}{5}{section.6}%
\contentsline {section}{\numberline {7}Violation 6: Function Structure and Single Responsibility}{6}{section.7}%
\contentsline {section}{\numberline {8}Violation 7: Missing \texttt {\_\_main\_\_} Guard}{7}{section.8}%
\contentsline {section}{\numberline {9}Violation 8: String Concatenation Instead of f-Strings}{7}{section.9}%
\contentsline {section}{\numberline {10}Summary of Violations}{8}{section.10}%

View File

@ -0,0 +1,64 @@
import sys,os,re;from typing import *
# calculator program
def scicalc(s):
s=s.replace(' ','')
if s=='':return 0
r=doPlusMinus(s,0,len(s))
return r
def doPlusMinus(s,a,b):
t=s[a:b]; level=0; i=len(t)-1
while i>=0:
c=t[i]
if c==')':level=level+1
if c=='(':level=level-1
if level==0 and (c=='+' or c=='-'):
L=doPlusMinus(s,a,a+i);R=doMulDiv(s,a+i+1,b)
if c=='+': return L+R
else: return L-R
i=i-1
return doMulDiv(s,a,b)
def doMulDiv(s,a,b):
t=s[a:b];level=0;i=len(t)-1
while i >= 0:
c=t[i]
if c==')':level+=1
if c=='(':level-=1
if level==0 and(c=='*' or c=='/'):
L = doMulDiv(s,a,a+i); R = getNum(s,a+i+1,b)
if c=='*':return L*R
else:
if R==0:print("ERROR division by zero!!!") ;return 0
return L/R
i -= 1
return getNum(s,a,b)
def getNum(s, a,b):
t = s[a:b]
if t[0]=='(' and t[-1]==')':
return doPlusMinus(s,a+1,b-1)
try:
x = float(t)
except:
print("bad number: "+t);x=0
return x
def main():
Data = [
"3 + 5",
"10 - 2 * 3",
"( 4 + 6 ) * 2",
"100 / ( 5 * 2 )",
"3.5 + 2.5 * 4",
"( 1 + 2 ) * ( 3 + 4 )",
"",
"10 / 0",
"abc + 1",
]
for d in Data:
Res=scicalc(d)
print(d+" = "+str(Res))
main()

View File

@ -0,0 +1,153 @@
"""Simple arithmetic expression calculator with a recursive-descent parser.
Supported operations: +, -, *, / and parentheses.
Does NOT use Python's eval().
Grammar:
expression = term (('+' | '-') term)*
term = factor (('*' | '/') factor)*
factor = NUMBER | '(' expression ')'
"""
def tokenize(expression_text):
"""Convert an expression string into a list of tokens.
Tokens are either numbers (float) or single-character operators / parentheses.
Raises ValueError for characters that are not part of a valid expression.
"""
tokens = []
position = 0
while position < len(expression_text):
character = expression_text[position]
if character.isspace():
position += 1
continue
if character in "+-*/()":
tokens.append(character)
position += 1
continue
if character.isdigit() or character == ".":
start = position
while position < len(expression_text) and (
expression_text[position].isdigit()
or expression_text[position] == "."
):
position += 1
number_text = expression_text[start:position]
tokens.append(float(number_text))
continue
raise ValueError(
f"Unexpected character '{character}' at position {position}"
)
return tokens
def parse_expression(tokens, position):
"""Parse an expression: term (('+' | '-') term)*."""
result, position = parse_term(tokens, position)
while position < len(tokens) and tokens[position] in ("+", "-"):
operator = tokens[position]
position += 1
right_value, position = parse_term(tokens, position)
if operator == "+":
result += right_value
else:
result -= right_value
return result, position
def parse_term(tokens, position):
"""Parse a term: factor (('*' | '/') factor)*."""
result, position = parse_factor(tokens, position)
while position < len(tokens) and tokens[position] in ("*", "/"):
operator = tokens[position]
position += 1
right_value, position = parse_factor(tokens, position)
if operator == "*":
result *= right_value
else:
if right_value == 0:
raise ZeroDivisionError("Division by zero")
result /= right_value
return result, position
def parse_factor(tokens, position):
"""Parse a factor: NUMBER | '(' expression ')'."""
if position >= len(tokens):
raise ValueError("Unexpected end of expression")
token = tokens[position]
if token == "(":
position += 1
result, position = parse_expression(tokens, position)
if position >= len(tokens) or tokens[position] != ")":
raise ValueError("Missing closing parenthesis")
position += 1
return result, position
if isinstance(token, float):
return token, position + 1
raise ValueError(f"Unexpected token: {token}")
def calculate(expression_text):
"""Evaluate an arithmetic expression string and return the result.
Returns the numeric result or an error message string.
"""
if not expression_text.strip():
return "Error: empty expression"
try:
tokens = tokenize(expression_text)
result, final_position = parse_expression(tokens, 0)
if final_position != len(tokens):
return f"Error: unexpected token '{tokens[final_position]}'"
if result == int(result):
return int(result)
return round(result, 10)
except (ValueError, ZeroDivisionError) as error:
return f"Error: {error}"
def main():
"""Run the calculator on a set of test expressions."""
test_expressions = [
"3 + 5",
"10 - 2 * 3",
"(4 + 6) * 2",
"100 / (5 * 2)",
"3.5 + 2.5 * 4",
"(1 + 2) * (3 + 4)",
"",
"10 / 0",
"abc + 1",
]
for expression in test_expressions:
result = calculate(expression)
display_expr = expression if expression else "(empty)"
print(f"{display_expr} = {result}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,11 @@
\relax
\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
\@nameuse{bbl@beforestart}
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\babel@aux{english}{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Use Case}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Example Input / Output}{1}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Exercise}{1}{section.3}\protected@file@percent }
\gdef \@abspage@last{2}

View File

@ -0,0 +1,3 @@
\BOOKMARK [1][-]{section.1}{\376\377\000U\000s\000e\000\040\000C\000a\000s\000e}{}% 1
\BOOKMARK [1][-]{section.2}{\376\377\000E\000x\000a\000m\000p\000l\000e\000\040\000I\000n\000p\000u\000t\000\040\000/\000\040\000O\000u\000t\000p\000u\000t}{}% 2
\BOOKMARK [1][-]{section.3}{\376\377\000E\000x\000e\000r\000c\000i\000s\000e}{}% 3

View File

@ -0,0 +1,90 @@
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{geometry}
\geometry{margin=2.5cm}
\usepackage{xcolor}
\usepackage{tcolorbox}
\usepackage{booktabs}
\usepackage{hyperref}
\definecolor{seblue}{rgb}{0.0,0.28,0.67}
\title{\textcolor{seblue}{Exercise 1: Arithmetic Expression Calculator}\\[0.3em]
\large AISE501 -- AI in Software Engineering I}
\author{Dr.\ Florian Herzog}
\date{Spring Semester 2026}
\begin{document}
\maketitle
\section{Use Case}
A user enters an arithmetic expression as a text string, for example \texttt{"3 + 5 * 2"}.
The program evaluates the expression and prints the result.
The calculator must:
\begin{itemize}
\item Support the four basic operations: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}
\item Respect standard operator precedence (\texttt{*} and \texttt{/} bind more tightly than \texttt{+} and \texttt{-})
\item Support parentheses for grouping, e.g.\ \texttt{"(4 + 6) * 2"}
\item Support decimal numbers, e.g.\ \texttt{"3.5 + 2.5"}
\item Handle errors gracefully (division by zero, invalid characters, empty input)
\item \textbf{Not} use Python's built-in \texttt{eval()} function
\end{itemize}
\section{Example Input / Output}
\begin{center}
\begin{tabular}{ll}
\toprule
\textbf{Input Expression} & \textbf{Expected Output} \\
\midrule
\texttt{3 + 5} & \texttt{8} \\
\texttt{10 - 2 * 3} & \texttt{4} \\
\texttt{(4 + 6) * 2} & \texttt{20} \\
\texttt{100 / (5 * 2)} & \texttt{10} \\
\texttt{3.5 + 2.5 * 4} & \texttt{13.5} \\
\texttt{(1 + 2) * (3 + 4)} & \texttt{21} \\
\texttt{(empty)} & Error message \\
\texttt{10 / 0} & Error message \\
\texttt{abc + 1} & Error message \\
\bottomrule
\end{tabular}
\end{center}
\section{Exercise}
Two implementations are provided:
\begin{enumerate}
\item \textbf{\texttt{calculator\_bad.py}} -- A working but poorly written version that violates many clean code and PEP\,8 principles.
\item \textbf{\texttt{calculator\_good.py}} -- A clean, well-structured version following PEP\,8 and clean code best practices.
\end{enumerate}
\subsection*{Tasks}
\begin{enumerate}
\item Run both programs and verify they produce the same results.
\item Read the bad version and list all clean code / PEP\,8 violations you can find.
\item For each violation, explain which principle is broken and why it makes the code harder to read or maintain.
\item Compare your list with the good version to see how each issue was resolved.
\end{enumerate}
\subsection*{Violations to Look For}
\begin{itemize}
\item Unused imports
\item Missing or misleading comments and docstrings
\item Poor variable and function names (abbreviations, single letters)
\item Inconsistent indentation and spacing
\item Multiple statements on one line (semicolons)
\item Missing whitespace around operators
\item No proper error handling (bare \texttt{except}, printing instead of raising)
\item Magic numbers and unclear logic flow
\item Missing \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard
\item No type clarity in function signatures
\end{itemize}
\end{document}

View File

@ -0,0 +1,25 @@
{
"accounts": [
{
"account_id": "ACC-001",
"holder": "Alice Mueller",
"balance": 5000.00,
"currency": "CHF",
"status": "active"
},
{
"account_id": "ACC-002",
"holder": "Bob Schneider",
"balance": 1200.50,
"currency": "CHF",
"status": "active"
},
{
"account_id": "ACC-003",
"holder": "Clara Brunner",
"balance": 300.00,
"currency": "CHF",
"status": "frozen"
}
]
}

View File

@ -0,0 +1,25 @@
{
"accounts": [
{
"account_id": "ACC-001",
"holder": "Alice Mueller",
"balance": 4550.0,
"currency": "CHF",
"status": "active"
},
{
"account_id": "ACC-002",
"holder": "Bob Schneider",
"balance": 1950.5,
"currency": "CHF",
"status": "active"
},
{
"account_id": "ACC-003",
"holder": "Clara Brunner",
"balance": 300.0,
"currency": "CHF",
"status": "frozen"
}
]
}

View File

@ -0,0 +1,25 @@
{
"accounts": [
{
"account_id": "ACC-001",
"holder": "Alice Mueller",
"balance": 4550.0,
"currency": "CHF",
"status": "active"
},
{
"account_id": "ACC-002",
"holder": "Bob Schneider",
"balance": 1950.5,
"currency": "CHF",
"status": "active"
},
{
"account_id": "ACC-003",
"holder": "Clara Brunner",
"balance": 300.0,
"currency": "CHF",
"status": "frozen"
}
]
}

View File

@ -0,0 +1,20 @@
\relax
\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
\@nameuse{bbl@beforestart}
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\babel@aux{english}{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{2}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Violation 1: Unused Imports and Import Formatting}{2}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Violation 2: No Documentation or Docstrings}{2}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Violation 3: Implicit Data Model}{3}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Violation 4: Poor Naming}{4}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Violation 5: Formatting -- Semicolons and Dense Lines}{5}{section.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {7}Violation 6: No Context Managers for File I/O}{6}{section.7}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {8}Violation 7: God Function -- Single Responsibility Violation}{7}{section.8}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {9}Violation 8: Magic Strings Instead of Constants}{8}{section.9}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {10}Violation 9: Comparison with \texttt {None}}{8}{section.10}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {11}Violation 10: Missing \texttt {\_\_main\_\_} Guard and String Formatting}{9}{section.11}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {12}Summary of Violations}{10}{section.12}\protected@file@percent }
\gdef \@abspage@last{10}

View File

@ -0,0 +1,12 @@
\BOOKMARK [1][-]{section.1}{\376\377\000O\000v\000e\000r\000v\000i\000e\000w}{}% 1
\BOOKMARK [1][-]{section.2}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0001\000:\000\040\000U\000n\000u\000s\000e\000d\000\040\000I\000m\000p\000o\000r\000t\000s\000\040\000a\000n\000d\000\040\000I\000m\000p\000o\000r\000t\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g}{}% 2
\BOOKMARK [1][-]{section.3}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0002\000:\000\040\000N\000o\000\040\000D\000o\000c\000u\000m\000e\000n\000t\000a\000t\000i\000o\000n\000\040\000o\000r\000\040\000D\000o\000c\000s\000t\000r\000i\000n\000g\000s}{}% 3
\BOOKMARK [1][-]{section.4}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0003\000:\000\040\000I\000m\000p\000l\000i\000c\000i\000t\000\040\000D\000a\000t\000a\000\040\000M\000o\000d\000e\000l}{}% 4
\BOOKMARK [1][-]{section.5}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0004\000:\000\040\000P\000o\000o\000r\000\040\000N\000a\000m\000i\000n\000g}{}% 5
\BOOKMARK [1][-]{section.6}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0005\000:\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g\000\040\040\023\000\040\000S\000e\000m\000i\000c\000o\000l\000o\000n\000s\000\040\000a\000n\000d\000\040\000D\000e\000n\000s\000e\000\040\000L\000i\000n\000e\000s}{}% 6
\BOOKMARK [1][-]{section.7}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0006\000:\000\040\000N\000o\000\040\000C\000o\000n\000t\000e\000x\000t\000\040\000M\000a\000n\000a\000g\000e\000r\000s\000\040\000f\000o\000r\000\040\000F\000i\000l\000e\000\040\000I\000/\000O}{}% 7
\BOOKMARK [1][-]{section.8}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0007\000:\000\040\000G\000o\000d\000\040\000F\000u\000n\000c\000t\000i\000o\000n\000\040\040\023\000\040\000S\000i\000n\000g\000l\000e\000\040\000R\000e\000s\000p\000o\000n\000s\000i\000b\000i\000l\000i\000t\000y\000\040\000V\000i\000o\000l\000a\000t\000i\000o\000n}{}% 8
\BOOKMARK [1][-]{section.9}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0008\000:\000\040\000M\000a\000g\000i\000c\000\040\000S\000t\000r\000i\000n\000g\000s\000\040\000I\000n\000s\000t\000e\000a\000d\000\040\000o\000f\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s}{}% 9
\BOOKMARK [1][-]{section.10}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0009\000:\000\040\000C\000o\000m\000p\000a\000r\000i\000s\000o\000n\000\040\000w\000i\000t\000h\000\040\000N\000o\000n\000e}{}% 10
\BOOKMARK [1][-]{section.11}{\376\377\000V\000i\000o\000l\000a\000t\000i\000o\000n\000\040\0001\0000\000:\000\040\000M\000i\000s\000s\000i\000n\000g\000\040\000\137\000\137\000m\000a\000i\000n\000\137\000\137\000\040\000G\000u\000a\000r\000d\000\040\000a\000n\000d\000\040\000S\000t\000r\000i\000n\000g\000\040\000F\000o\000r\000m\000a\000t\000t\000i\000n\000g}{}% 11
\BOOKMARK [1][-]{section.12}{\376\377\000S\000u\000m\000m\000a\000r\000y\000\040\000o\000f\000\040\000V\000i\000o\000l\000a\000t\000i\000o\000n\000s}{}% 12

Binary file not shown.

View File

@ -0,0 +1,526 @@
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{geometry}
\geometry{margin=2.5cm}
\usepackage{xcolor}
\usepackage{tcolorbox}
\usepackage{booktabs}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{enumitem}
\definecolor{seblue}{rgb}{0.0,0.28,0.67}
\definecolor{segreen}{rgb}{0.13,0.55,0.13}
\definecolor{sered}{rgb}{0.7,0.13,0.13}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\lstdefinestyle{pystyle}{
backgroundcolor=\color{backcolour},
commentstyle=\color{codegreen},
keywordstyle=\color{blue},
stringstyle=\color{codepurple},
basicstyle=\ttfamily\footnotesize,
breaklines=true,
keepspaces=true,
showstringspaces=false,
tabsize=4,
language=Python
}
\lstset{style=pystyle}
\newtcolorbox{badbox}{
colback=red!5!white,
colframe=sered,
title=Bad Code,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\newtcolorbox{goodbox}{
colback=green!5!white,
colframe=segreen,
title=Clean Code,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\newtcolorbox{principlebox}[1][]{
colback=blue!5!white,
colframe=seblue,
title=#1,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\title{\textcolor{seblue}{Code Analysis: Bank Account Transaction Processor}\\[0.3em]
\large What Makes Code Bad and How to Fix It\\[0.3em]
\normalsize AISE501 -- AI in Software Engineering I}
\author{Dr.\ Florian Herzog}
\date{Spring Semester 2026}
\begin{document}
\maketitle
\tableofcontents
\newpage
% ============================================
\section{Overview}
% ============================================
This document analyses two implementations of a bank account transaction processor.
Both read account state and transactions from JSON files, validate each transaction, apply valid ones, reject invalid ones, and write results.
Both produce identical output, but \texttt{bank\_bad.py} violates many PEP\,8 and clean code principles, while \texttt{bank\_good.py} follows them consistently.
% ============================================
\section{Violation 1: Unused Imports and Import Formatting}
% ============================================
\begin{badbox}
\begin{lstlisting}
import json,sys,os,copy;from datetime import datetime
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
import json
from typing import TypedDict, Optional
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong:}
\begin{itemize}
\item \texttt{sys}, \texttt{os}, \texttt{copy}, and \texttt{datetime} are imported but \textbf{never used}.
\item All imports are \textbf{on a single line} separated by commas, with a semicolon joining two import statements.
\item PEP\,8 requires each import on its own line and groups separated by blank lines (standard library, third-party, local).
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Imports}: Imports should be on separate lines. Remove unused imports.
\item \textbf{KISS}: Unused imports add noise and suggest false dependencies.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 2: No Documentation or Docstrings}
% ============================================
\begin{badbox}
The file has \textbf{no module docstring} and \textbf{no function docstrings}. The only comment in the entire file is:
\begin{lstlisting}
# find account
...
# print results
\end{lstlisting}
These comments describe \textit{what} the next line does (which is already obvious from the code), not \textit{why}.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
"""Bank account transaction processor.
Reads account state and a list of transactions from JSON files,
validates and applies each transaction, then writes updated account
state and a transaction log (accepted / rejected) to output files.
"""
\end{lstlisting}
Every function has a docstring:
\begin{lstlisting}
def validate_common(
account: Optional[Account],
amount: float,
) -> Optional[str]:
"""Run validations shared by all transaction types.
Returns an error message string, or None if valid.
"""
\end{lstlisting}
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,257}: All public modules and functions should have docstrings.
\item \textbf{Clean Code -- Comments}: Don't add noise comments that just restate the code. Comments should explain \textit{why}, not \textit{what}.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 3: Implicit Data Model}
% ============================================
\begin{badbox}
The bad version operates on raw dictionaries with no type declarations.
A reader must trace through the JSON file and every dictionary access to understand the data shape:
\begin{lstlisting}
def proc(accs,txns):
for t in txns:
tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
a=None
for x in accs:
if x['account_id']==aid:a=x
\end{lstlisting}
What fields does \texttt{t} have? What fields does \texttt{a} have? There is no way to know without reading the JSON file.
\end{badbox}
\begin{goodbox}
The good version defines explicit data types:
\begin{lstlisting}
class Account(TypedDict):
"""A bank account with its current state."""
account_id: str
holder: str
balance: float
currency: str
status: str # "active" or "frozen"
class Transaction(TypedDict, total=False):
"""A financial transaction to be processed."""
id: str
type: str # "deposit", "withdrawal", or "transfer"
account_id: str
amount: float
description: str
to_account_id: str # only for transfers
status: str # added after processing
reason: str # added on rejection
\end{lstlisting}
All function signatures carry type annotations:
\begin{lstlisting}
def find_account(accounts: list[Account], account_id: str) -> Optional[Account]:
\end{lstlisting}
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Zen of Python}: ``Explicit is better than implicit.''
\item \textbf{Clean Code -- Readability}: A reader should understand the data contract without tracing through runtime data.
\item \textbf{PEP\,484 / PEP\,589}: Use type hints and \texttt{TypedDict} to document the structure of dictionary-based data.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 4: Poor Naming}
% ============================================
\begin{badbox}
\begin{lstlisting}
def loadJ(p): # "J" for JSON? "p" for path?
def saveJ(p,d): # "d" for data?
def proc(accs,txns): # "proc" does what exactly?
ok=[];bad=[] # acceptable vs. rejected
tp=t['type'] # "tp" is unpronounceable
aid=t['account_id'] # "aid" looks like "aid" (help)
amt=t['amount'] # "amt" -- abbreviation
tid=t['id'] # "tid" -- never used again!
a=None # "a" for account
ta=None # "ta" for target account
for x in accs: # "x" for what?
D=loadJ(...) # capital "D" for a local variable
T=loadJ(...) # capital "T" for a local variable
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
def load_json(file_path):
def save_json(file_path, data):
def find_account(accounts, account_id):
def validate_common(account, amount):
def process_deposit(accounts, transaction):
def process_withdrawal(accounts, transaction):
def process_transfer(accounts, transaction):
def process_all_transactions(accounts, transactions):
def print_results(accounts, accepted, rejected):
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong:}
\begin{itemize}
\item Function names use \textbf{abbreviations} (\texttt{loadJ}, \texttt{saveJ}, \texttt{proc}) instead of descriptive snake\_case names.
\item Variable names are \textbf{single letters or short abbreviations} (\texttt{a}, \texttt{t}, \texttt{x}, \texttt{tp}, \texttt{aid}, \texttt{amt}, \texttt{ta}).
\item \texttt{tid} is assigned but \textbf{never used} --- dead code.
\item \texttt{D} and \texttt{T} use \textbf{uppercase}, suggesting constants, but they are local variables.
\item The name \texttt{ok} for accepted transactions and \texttt{bad} for rejected ones is \textbf{imprecise}.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}.
\item \textbf{Clean Code -- Descriptive Names}: ``Other developers should figure out what a variable stores just by reading its name.''
\item \textbf{Clean Code -- Consistent Vocabulary}: Don't mix \texttt{ok}/\texttt{bad} with \texttt{accepted}/\texttt{rejected}.
\item \textbf{Clean Code -- No Abbreviations}: \texttt{amt}, \texttt{tp}, \texttt{tid} are not words.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 5: Formatting -- Semicolons and Dense Lines}
% ============================================
\begin{badbox}
\begin{lstlisting}
f=open(p,'r');d=json.load(f);f.close();return d
\end{lstlisting}
\begin{lstlisting}
tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
\end{lstlisting}
\begin{lstlisting}
a['balance']=a['balance']+amt;t['status']='accepted';ok.append(t)
\end{lstlisting}
\begin{lstlisting}
if a==None:
t['reason']='account not found';bad.append(t);continue
\end{lstlisting}
\end{badbox}
\begin{goodbox}
Every statement is on its own line with proper whitespace:
\begin{lstlisting}
account = find_account(accounts, transaction["account_id"])
error = validate_common(account, transaction["amount"])
if error:
return False, error
account["balance"] += transaction["amount"]
return True, "accepted"
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong:}
\begin{itemize}
\item \textbf{Semicolons} pack 3--4 statements onto one line, making it nearly impossible to follow the logic.
\item \textbf{No whitespace} around \texttt{=} and after commas.
\item Control flow (\texttt{continue}) is \textbf{hidden at the end of a dense line}.
\item PEP\,8 explicitly states: ``Compound statements (multiple statements on the same line) are generally discouraged.''
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Compound Statements}: Generally discouraged. Each statement on its own line.
\item \textbf{PEP\,8 -- Whitespace}: Surround operators with spaces. Space after commas.
\item \textbf{Zen of Python}: ``Readability counts.'' ``Sparse is better than dense.''
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 6: No Context Managers for File I/O}
% ============================================
\begin{badbox}
\begin{lstlisting}
def loadJ(p):
f=open(p,'r');d=json.load(f);f.close();return d
def saveJ(p,d):
f=open(p,'w');json.dump(d,f,indent=2);f.close()
\end{lstlisting}
If \texttt{json.load(f)} raises an exception, the file is \textbf{never closed} because \texttt{f.close()} is skipped. This is a resource leak.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
def load_json(file_path: str) -> dict:
"""Read and parse a JSON file, returning the parsed data."""
with open(file_path, "r", encoding="utf-8") as file_handle:
return json.load(file_handle)
\end{lstlisting}
The \texttt{with} statement guarantees the file is closed even if an exception occurs.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Pythonic Code}: Always use context managers (\texttt{with}) for resource management.
\item \textbf{Clean Code -- Error Handling}: Code should be robust against exceptions. Manual \texttt{open}/\texttt{close} is error-prone.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 7: God Function -- Single Responsibility Violation}
% ============================================
\begin{badbox}
The function \texttt{proc()} is 38 lines long and handles \textbf{all of the following} in a single function:
\begin{itemize}[nosep]
\item Finding accounts by ID
\item Validating account status
\item Validating amounts
\item Processing deposits
\item Processing withdrawals
\item Processing transfers (including finding the target account)
\item Handling unknown transaction types
\item Building accepted and rejected lists
\end{itemize}
\begin{lstlisting}
def proc(accs,txns):
ok=[];bad=[]
for t in txns:
... # 35 lines of nested if/elif/else with continue
return accs,ok,bad
\end{lstlisting}
\end{badbox}
\begin{goodbox}
The good version splits this into \textbf{seven focused functions}:
\begin{lstlisting}
def find_account(accounts, account_id): # lookup
def validate_common(account, amount): # shared validation
def process_deposit(accounts, transaction): # deposit logic
def process_withdrawal(accounts, transaction):# withdrawal logic
def process_transfer(accounts, transaction): # transfer logic
def process_all_transactions(accounts, transactions): # orchestration
def print_results(accounts, accepted, rejected): # output
\end{lstlisting}
A dispatch dictionary replaces the \texttt{if/elif} chain:
\begin{lstlisting}
TRANSACTION_HANDLERS = {
"deposit": process_deposit,
"withdrawal": process_withdrawal,
"transfer": process_transfer,
}
\end{lstlisting}
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{SRP (Single Responsibility Principle)}: Each function should have one reason to change.
\item \textbf{DRY (Don't Repeat Yourself)}: The amount validation (\texttt{amt<=0}) is duplicated for deposits and transfers in the bad version; \texttt{validate\_common()} eliminates this.
\item \textbf{Clean Code -- Short Functions}: Functions should be comprehensible in a few minutes.
\item \textbf{Open-Closed Principle}: Adding a new transaction type in the bad version requires modifying the \texttt{proc()} function. In the good version, you add a new handler function and register it in the dictionary.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 8: Magic Strings Instead of Constants}
% ============================================
\begin{badbox}
\begin{lstlisting}
if a['status']!='active': # magic string
...
if tp=='deposit': # magic string
...
\end{lstlisting}
The strings \texttt{'active'}, \texttt{'deposit'}, \texttt{'withdrawal'}, and \texttt{'transfer'} appear throughout the code as \textbf{literals}. If the status name ever changed, every occurrence would need to be found and updated.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
ACTIVE_STATUS = "active"
...
if account["status"] != ACTIVE_STATUS:
\end{lstlisting}
Transaction types are handled via the \texttt{TRANSACTION\_HANDLERS} dictionary, so the string literals appear only \textbf{once} in the handler registration.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Clean Code -- No Magic Numbers/Strings}: Use named constants for values that carry domain meaning.
\item \textbf{DRY}: The same literal repeated in multiple places is a maintenance risk.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 9: Comparison with \texttt{None}}
% ============================================
\begin{badbox}
\begin{lstlisting}
if a==None:
...
if ta==None:
...
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
if account is None:
...
if target is None:
...
\end{lstlisting}
\end{goodbox}
PEP\,8 explicitly states: ``Comparisons to singletons like \texttt{None} should always be done with \texttt{is} or \texttt{is not}, never the equality operators.''
The \texttt{is} operator checks \textbf{identity} (the correct test for \texttt{None}), while \texttt{==} checks \textbf{equality} and can be overridden by custom \texttt{\_\_eq\_\_} methods.
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Programming Recommendations}: Use \texttt{is None}, not \texttt{== None}.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 10: Missing \texttt{\_\_main\_\_} Guard and String Formatting}
% ============================================
\begin{badbox}
\begin{lstlisting}
main()
\end{lstlisting}
\begin{lstlisting}
print(" "+a['account_id']+" "+a['holder']+": "+str(a['balance'])
+" "+a['currency']+" ("+a['status']+")")
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
if __name__ == "__main__":
main()
\end{lstlisting}
\begin{lstlisting}
print(
f" {account['account_id']} {account['holder']}: "
f"{account['balance']:.2f} {account['currency']} "
f"({account['status']})"
)
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong:}
\begin{itemize}
\item No \texttt{\_\_main\_\_} guard means importing the module triggers execution.
\item String concatenation with \texttt{+} and \texttt{str()} is harder to read than f-strings.
\item The bad version does not format numbers (\texttt{str(5000.0)} vs.\ \texttt{5000.00}).
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Clean Code -- Avoid Side Effects}: Importing should not trigger execution.
\item \textbf{Pythonic Code}: Use f-strings for string formatting.
\end{itemize}
\end{principlebox}
% ============================================
\section{Summary of Violations}
% ============================================
\begin{center}
\small
\begin{tabular}{@{}rp{4.5cm}p{5.5cm}@{}}
\toprule
\textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\
\midrule
1 & Unused imports, one-line format & PEP\,8 Imports, KISS \\
2 & No docstrings, noise comments & PEP\,257, Clean Code Documentation \\
3 & Implicit data model (raw dicts) & Explicit $>$ Implicit, PEP\,484/589 \\
4 & Abbreviations, single-letter names & PEP\,8 Naming, Descriptive Names \\
5 & Semicolons, dense lines, no whitespace & PEP\,8 Whitespace, Zen of Python \\
6 & Manual file open/close & Pythonic Code, Context Managers \\
7 & God function (38-line \texttt{proc}) & SRP, DRY, Open-Closed Principle \\
8 & Magic strings & No Magic Numbers, DRY \\
9 & \texttt{== None} instead of \texttt{is None} & PEP\,8 Programming Recommendations \\
10 & No \texttt{\_\_main\_\_} guard, string concat & Side Effects, Pythonic Code \\
\bottomrule
\end{tabular}
\end{center}
\end{document}

View File

@ -0,0 +1,13 @@
\babel@toc {english}{}\relax
\contentsline {section}{\numberline {1}Overview}{2}{section.1}%
\contentsline {section}{\numberline {2}Violation 1: Unused Imports and Import Formatting}{2}{section.2}%
\contentsline {section}{\numberline {3}Violation 2: No Documentation or Docstrings}{2}{section.3}%
\contentsline {section}{\numberline {4}Violation 3: Implicit Data Model}{3}{section.4}%
\contentsline {section}{\numberline {5}Violation 4: Poor Naming}{4}{section.5}%
\contentsline {section}{\numberline {6}Violation 5: Formatting -- Semicolons and Dense Lines}{5}{section.6}%
\contentsline {section}{\numberline {7}Violation 6: No Context Managers for File I/O}{6}{section.7}%
\contentsline {section}{\numberline {8}Violation 7: God Function -- Single Responsibility Violation}{7}{section.8}%
\contentsline {section}{\numberline {9}Violation 8: Magic Strings Instead of Constants}{8}{section.9}%
\contentsline {section}{\numberline {10}Violation 9: Comparison with \texttt {None}}{8}{section.10}%
\contentsline {section}{\numberline {11}Violation 10: Missing \texttt {\_\_main\_\_} Guard and String Formatting}{9}{section.11}%
\contentsline {section}{\numberline {12}Summary of Violations}{10}{section.12}%

View File

@ -0,0 +1,62 @@
import json,sys,os,copy;from datetime import datetime
def loadJ(p):
f=open(p,'r');d=json.load(f);f.close();return d
def saveJ(p,d):
f=open(p,'w');json.dump(d,f,indent=2);f.close()
def proc(accs,txns):
ok=[];bad=[]
for t in txns:
tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
# find account
a=None
for x in accs:
if x['account_id']==aid:a=x
if a==None:
t['reason']='account not found';bad.append(t);continue
if a['status']!='active':
t['reason']='account not active';bad.append(t);continue
if amt<=0 and tp!='withdrawal':
if tp=='deposit':t['reason']='invalid amount';bad.append(t);continue
if tp=='transfer':t['reason']='invalid amount';bad.append(t);continue
if amt<=0 and tp=='withdrawal':
t['reason']='invalid amount';bad.append(t);continue
if tp=='deposit':
a['balance']=a['balance']+amt;t['status']='accepted';ok.append(t)
elif tp=='withdrawal':
if a['balance']>=amt:
a['balance']=a['balance']-amt;t['status']='accepted';ok.append(t)
else:
t['reason']='insufficient funds';t['status']='rejected';bad.append(t)
elif tp=='transfer':
ta=None
for x in accs:
if x['account_id']==t.get('to_account_id',''):ta=x
if ta==None:t['reason']='target account not found';bad.append(t);continue
if ta['status']!='active':t['reason']='target account not active';bad.append(t);continue
if a['balance']>=amt:
a['balance']=a['balance']-amt;ta['balance']=ta['balance']+amt
t['status']='accepted';ok.append(t)
else:
t['reason']='insufficient funds';t['status']='rejected';bad.append(t)
else:
t['reason']='unknown type';bad.append(t)
return accs,ok,bad
def main():
D=loadJ('accounts.json');T=loadJ('transactions.json')
accs=D['accounts'];txns=T['transactions']
accs,ok,bad=proc(accs,txns)
# print results
print("=== UPDATED ACCOUNTS ===")
for a in accs:print(" "+a['account_id']+" "+a['holder']+": "+str(a['balance'])+" "+a['currency']+" ("+a['status']+")")
print("\n=== ACCEPTED ("+str(len(ok))+") ===")
for t in ok:print(" "+t['id']+" "+t['type']+" "+str(t['amount'])+" -> "+t.get('description',''))
print("\n=== REJECTED ("+str(len(bad))+") ===")
for t in bad:print(" "+t['id']+" "+t['type']+" "+str(t['amount'])+" -> "+t.get('reason','unknown'))
saveJ('accounts_updated_bad.json',{"accounts":accs})
saveJ('transaction_log_bad.json',{"accepted":ok,"rejected":bad})
main()

View File

@ -0,0 +1,280 @@
"""Bank account transaction processor.
Reads account state and a list of transactions from JSON files,
validates and applies each transaction, then writes updated account
state and a transaction log (accepted / rejected) to output files.
"""
import json
from typing import TypedDict, Optional
# ---------------------------------------------------------------------------
# Explicit data model -- defines the exact shape of every data structure
# ---------------------------------------------------------------------------
class Account(TypedDict):
"""A bank account with its current state."""
account_id: str
holder: str
balance: float
currency: str
status: str # "active" or "frozen"
class Transaction(TypedDict, total=False):
"""A financial transaction to be processed.
Fields marked total=False are optional (e.g. to_account_id only
exists for transfers; status/reason are added during processing).
"""
id: str
type: str # "deposit", "withdrawal", or "transfer"
account_id: str
amount: float
description: str
to_account_id: str # only for transfers
status: str # added after processing: "accepted" / "rejected"
reason: str # added on rejection
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
ACCOUNTS_INPUT = "accounts.json"
TRANSACTIONS_INPUT = "transactions.json"
ACCOUNTS_OUTPUT = "accounts_updated_good.json"
TRANSACTION_LOG_OUTPUT = "transaction_log_good.json"
ACTIVE_STATUS = "active"
# ---------------------------------------------------------------------------
# File I/O
# ---------------------------------------------------------------------------
def load_json(file_path: str) -> dict:
"""Read and parse a JSON file, returning the parsed data."""
with open(file_path, "r", encoding="utf-8") as file_handle:
return json.load(file_handle)
def save_json(file_path: str, data: dict) -> None:
"""Write data to a JSON file with readable indentation."""
with open(file_path, "w", encoding="utf-8") as file_handle:
json.dump(data, file_handle, indent=2, ensure_ascii=False)
def load_accounts(file_path: str) -> list[Account]:
"""Load and return the list of accounts from a JSON file."""
data = load_json(file_path)
return data["accounts"]
def load_transactions(file_path: str) -> list[Transaction]:
"""Load and return the list of transactions from a JSON file."""
data = load_json(file_path)
return data["transactions"]
# ---------------------------------------------------------------------------
# Account lookup
# ---------------------------------------------------------------------------
def find_account(accounts: list[Account], account_id: str) -> Optional[Account]:
"""Find an account by its ID. Returns the account dict or None."""
for account in accounts:
if account["account_id"] == account_id:
return account
return None
# ---------------------------------------------------------------------------
# Validation
# ---------------------------------------------------------------------------
def validate_common(
account: Optional[Account],
amount: float,
) -> Optional[str]:
"""Run validations shared by all transaction types.
Returns an error message string, or None if valid.
"""
if account is None:
return "account not found"
if account["status"] != ACTIVE_STATUS:
return f"account is {account['status']}"
if amount is None or amount <= 0:
return "amount must be positive"
return None
# ---------------------------------------------------------------------------
# Transaction handlers -- one function per transaction type
# ---------------------------------------------------------------------------
def process_deposit(
accounts: list[Account],
transaction: Transaction,
) -> tuple[bool, str]:
"""Apply a deposit transaction. Returns (success, reason)."""
account = find_account(accounts, transaction["account_id"])
error = validate_common(account, transaction["amount"])
if error:
return False, error
account["balance"] += transaction["amount"]
return True, "accepted"
def process_withdrawal(
accounts: list[Account],
transaction: Transaction,
) -> tuple[bool, str]:
"""Apply a withdrawal transaction. Returns (success, reason)."""
account = find_account(accounts, transaction["account_id"])
error = validate_common(account, transaction["amount"])
if error:
return False, error
if account["balance"] < transaction["amount"]:
return False, "insufficient funds"
account["balance"] -= transaction["amount"]
return True, "accepted"
def process_transfer(
accounts: list[Account],
transaction: Transaction,
) -> tuple[bool, str]:
"""Apply a transfer between two accounts. Returns (success, reason)."""
source = find_account(accounts, transaction["account_id"])
error = validate_common(source, transaction["amount"])
if error:
return False, f"source: {error}"
target_id = transaction.get("to_account_id", "")
target = find_account(accounts, target_id)
if target is None:
return False, "target account not found"
if target["status"] != ACTIVE_STATUS:
return False, f"target account is {target['status']}"
if source["balance"] < transaction["amount"]:
return False, "insufficient funds"
source["balance"] -= transaction["amount"]
target["balance"] += transaction["amount"]
return True, "accepted"
TRANSACTION_HANDLERS = {
"deposit": process_deposit,
"withdrawal": process_withdrawal,
"transfer": process_transfer,
}
# ---------------------------------------------------------------------------
# Processing
# ---------------------------------------------------------------------------
def process_all_transactions(
accounts: list[Account],
transactions: list[Transaction],
) -> tuple[list[Transaction], list[Transaction]]:
"""Process a list of transactions against the account state.
Returns two lists: (accepted_transactions, rejected_transactions).
Each transaction is augmented with 'status' and optionally 'reason'.
"""
accepted: list[Transaction] = []
rejected: list[Transaction] = []
for transaction in transactions:
transaction_type = transaction.get("type", "")
handler = TRANSACTION_HANDLERS.get(transaction_type)
if handler is None:
transaction["status"] = "rejected"
transaction["reason"] = f"unknown transaction type '{transaction_type}'"
rejected.append(transaction)
continue
success, reason = handler(accounts, transaction)
if success:
transaction["status"] = "accepted"
accepted.append(transaction)
else:
transaction["status"] = "rejected"
transaction["reason"] = reason
rejected.append(transaction)
return accepted, rejected
# ---------------------------------------------------------------------------
# Output
# ---------------------------------------------------------------------------
def print_results(
accounts: list[Account],
accepted: list[Transaction],
rejected: list[Transaction],
) -> None:
"""Print a human-readable summary to the console."""
print("=== UPDATED ACCOUNTS ===")
for account in accounts:
print(
f" {account['account_id']} {account['holder']}: "
f"{account['balance']:.2f} {account['currency']} "
f"({account['status']})"
)
print(f"\n=== ACCEPTED TRANSACTIONS ({len(accepted)}) ===")
for txn in accepted:
print(
f" {txn['id']} {txn['type']:12s} {txn['amount']:>10.2f} "
f"{txn.get('description', '')}"
)
print(f"\n=== REJECTED TRANSACTIONS ({len(rejected)}) ===")
for txn in rejected:
print(
f" {txn['id']} {txn['type']:12s} {txn['amount']:>10.2f} "
f"Reason: {txn.get('reason', 'unknown')}"
)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
"""Load data, process transactions, print and save results."""
accounts: list[Account] = load_accounts(ACCOUNTS_INPUT)
transactions: list[Transaction] = load_transactions(TRANSACTIONS_INPUT)
accepted, rejected = process_all_transactions(accounts, transactions)
print_results(accounts, accepted, rejected)
save_json(ACCOUNTS_OUTPUT, {"accounts": accounts})
save_json(TRANSACTION_LOG_OUTPUT, {
"accepted": accepted,
"rejected": rejected,
})
print(f"\nOutput written to {ACCOUNTS_OUTPUT} and {TRANSACTION_LOG_OUTPUT}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,16 @@
\relax
\providecommand \babel@aux [2]{\global \let \babel@toc \@gobbletwo }
\@nameuse{bbl@beforestart}
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\babel@aux{english}{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Use Case}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Input Files}{1}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Account State (\texttt {accounts.json})}{1}{subsection.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Transactions (\texttt {transactions.json})}{1}{subsection.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Validation Rules}{1}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Output}{2}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Expected Results}{2}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Exercise}{2}{section.6}\protected@file@percent }
\gdef \@abspage@last{3}

View File

@ -0,0 +1,8 @@
\BOOKMARK [1][-]{section.1}{\376\377\000U\000s\000e\000\040\000C\000a\000s\000e}{}% 1
\BOOKMARK [1][-]{section.2}{\376\377\000I\000n\000p\000u\000t\000\040\000F\000i\000l\000e\000s}{}% 2
\BOOKMARK [2][-]{subsection.2.1}{\376\377\000A\000c\000c\000o\000u\000n\000t\000\040\000S\000t\000a\000t\000e\000\040\000\050\000a\000c\000c\000o\000u\000n\000t\000s\000.\000j\000s\000o\000n\000\051}{section.2}% 3
\BOOKMARK [2][-]{subsection.2.2}{\376\377\000T\000r\000a\000n\000s\000a\000c\000t\000i\000o\000n\000s\000\040\000\050\000t\000r\000a\000n\000s\000a\000c\000t\000i\000o\000n\000s\000.\000j\000s\000o\000n\000\051}{section.2}% 4
\BOOKMARK [1][-]{section.3}{\376\377\000V\000a\000l\000i\000d\000a\000t\000i\000o\000n\000\040\000R\000u\000l\000e\000s}{}% 5
\BOOKMARK [1][-]{section.4}{\376\377\000O\000u\000t\000p\000u\000t}{}% 6
\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000c\000t\000e\000d\000\040\000R\000e\000s\000u\000l\000t\000s}{}% 7
\BOOKMARK [1][-]{section.6}{\376\377\000E\000x\000e\000r\000c\000i\000s\000e}{}% 8

Binary file not shown.

View File

@ -0,0 +1,152 @@
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{geometry}
\geometry{margin=2.5cm}
\usepackage{xcolor}
\usepackage{tcolorbox}
\usepackage{booktabs}
\usepackage{hyperref}
\usepackage{listings}
\definecolor{seblue}{rgb}{0.0,0.28,0.67}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
\lstdefinestyle{json}{
backgroundcolor=\color{backcolour},
basicstyle=\ttfamily\small,
breaklines=true,
showstringspaces=false,
tabsize=2
}
\title{\textcolor{seblue}{Exercise 2: Bank Account Transaction Processor}\\[0.3em]
\large AISE501 -- AI in Software Engineering I}
\author{Dr.\ Florian Herzog}
\date{Spring Semester 2026}
\begin{document}
\maketitle
\section{Use Case}
A simple bank system maintains a set of customer accounts, each with a balance, currency, and status (\texttt{active} or \texttt{frozen}).
A series of transactions is submitted for processing.
The program must validate each transaction, apply valid ones, reject invalid ones, and produce output files recording the results.
\section{Input Files}
\subsection{Account State (\texttt{accounts.json})}
A JSON file containing an array of account objects:
\begin{lstlisting}[style=json]
{
"accounts": [
{
"account_id": "ACC-001",
"holder": "Alice Mueller",
"balance": 5000.00,
"currency": "CHF",
"status": "active"
},
...
]
}
\end{lstlisting}
\subsection{Transactions (\texttt{transactions.json})}
A JSON file containing an array of transaction objects.
Each transaction has a \texttt{type} (\texttt{deposit}, \texttt{withdrawal}, or \texttt{transfer}), an \texttt{account\_id}, an \texttt{amount}, and a \texttt{description}.
Transfers additionally have a \texttt{to\_account\_id}.
\section{Validation Rules}
A transaction is \textbf{rejected} if any of these conditions apply:
\begin{center}
\begin{tabular}{ll}
\toprule
\textbf{Condition} & \textbf{Applies to} \\
\midrule
Account ID does not exist & All types \\
Account status is not \texttt{active} & All types \\
Amount is zero or negative & All types \\
Balance is less than withdrawal amount & Withdrawal, Transfer \\
Target account does not exist & Transfer \\
Target account is not \texttt{active} & Transfer \\
Unknown transaction type & -- \\
\bottomrule
\end{tabular}
\end{center}
\section{Output}
The program produces:
\begin{enumerate}
\item \textbf{Console output} -- A summary of updated account balances, accepted transactions, and rejected transactions with reasons.
\item \textbf{Updated account state} (\texttt{accounts\_updated.json}) -- The accounts JSON with balances modified by accepted transactions.
\item \textbf{Transaction log} (\texttt{transaction\_log.json}) -- Two arrays: \texttt{accepted} and \texttt{rejected}, each transaction annotated with its \texttt{status} and (for rejections) a \texttt{reason}.
\end{enumerate}
\section{Expected Results}
Given the provided input files, the expected outcome is:
\begin{center}
\small
\begin{tabular}{lllp{5cm}}
\toprule
\textbf{TXN ID} & \textbf{Type} & \textbf{Result} & \textbf{Reason (if rejected)} \\
\midrule
TXN-001 & deposit & Accepted & -- \\
TXN-002 & withdrawal & Accepted & -- \\
TXN-003 & withdrawal & Rejected & Insufficient funds \\
TXN-004 & deposit & Rejected & Negative amount \\
TXN-005 & deposit & Rejected & Account is frozen \\
TXN-006 & transfer & Accepted & -- \\
TXN-007 & withdrawal & Rejected & Account not found \\
TXN-008 & deposit & Rejected & Zero amount \\
\bottomrule
\end{tabular}
\end{center}
\section{Exercise}
Two implementations are provided:
\begin{enumerate}
\item \textbf{\texttt{bank\_bad.py}} -- A working but poorly written version that violates many clean code and PEP\,8 principles.
\item \textbf{\texttt{bank\_good.py}} -- A clean, well-structured version following PEP\,8 and clean code best practices.
\end{enumerate}
\subsection*{Tasks}
\begin{enumerate}
\item Run both programs and verify they produce the same results.
\item Read the bad version and list all clean code / PEP\,8 violations you can find.
\item For each violation, explain which principle is broken and why it makes the code harder to read or maintain.
\item Compare your list with the good version to see how each issue was resolved.
\end{enumerate}
\subsection*{Violations to Look For}
\begin{itemize}
\item Unused imports (\texttt{sys}, \texttt{os}, \texttt{copy}, \texttt{datetime})
\item No docstrings or module documentation
\item Single-letter and abbreviated variable names (\texttt{a}, \texttt{t}, \texttt{d}, \texttt{tp}, \texttt{tid})
\item Multiple statements per line (semicolons)
\item No whitespace around operators and after commas
\item Manual file open/close instead of context managers (\texttt{with})
\item One giant function doing all validation (violates Single Responsibility)
\item Duplicated validation logic for deposit/transfer amount checks
\item No constants for file paths
\item Missing \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard
\item Inconsistent error handling and status assignment
\item Hard-to-follow control flow with nested \texttt{if}/\texttt{elif}/\texttt{continue}
\end{itemize}
\end{document}

View File

@ -0,0 +1,72 @@
{
"accepted": [
{
"id": "TXN-001",
"type": "deposit",
"account_id": "ACC-001",
"amount": 500.0,
"description": "Salary payment",
"status": "accepted"
},
{
"id": "TXN-002",
"type": "withdrawal",
"account_id": "ACC-001",
"amount": 200.0,
"description": "ATM withdrawal",
"status": "accepted"
},
{
"id": "TXN-006",
"type": "transfer",
"account_id": "ACC-001",
"to_account_id": "ACC-002",
"amount": 750.0,
"description": "Transfer to Bob",
"status": "accepted"
}
],
"rejected": [
{
"id": "TXN-003",
"type": "withdrawal",
"account_id": "ACC-002",
"amount": 1500.0,
"description": "Rent payment - exceeds balance",
"reason": "insufficient funds",
"status": "rejected"
},
{
"id": "TXN-004",
"type": "deposit",
"account_id": "ACC-002",
"amount": -100.0,
"description": "Invalid negative deposit",
"reason": "invalid amount"
},
{
"id": "TXN-005",
"type": "deposit",
"account_id": "ACC-003",
"amount": 1000.0,
"description": "Deposit to frozen account",
"reason": "account not active"
},
{
"id": "TXN-007",
"type": "withdrawal",
"account_id": "ACC-999",
"amount": 50.0,
"description": "Unknown account",
"reason": "account not found"
},
{
"id": "TXN-008",
"type": "deposit",
"account_id": "ACC-001",
"amount": 0,
"description": "Zero-amount deposit",
"reason": "invalid amount"
}
]
}

View File

@ -0,0 +1,76 @@
{
"accepted": [
{
"id": "TXN-001",
"type": "deposit",
"account_id": "ACC-001",
"amount": 500.0,
"description": "Salary payment",
"status": "accepted"
},
{
"id": "TXN-002",
"type": "withdrawal",
"account_id": "ACC-001",
"amount": 200.0,
"description": "ATM withdrawal",
"status": "accepted"
},
{
"id": "TXN-006",
"type": "transfer",
"account_id": "ACC-001",
"to_account_id": "ACC-002",
"amount": 750.0,
"description": "Transfer to Bob",
"status": "accepted"
}
],
"rejected": [
{
"id": "TXN-003",
"type": "withdrawal",
"account_id": "ACC-002",
"amount": 1500.0,
"description": "Rent payment - exceeds balance",
"status": "rejected",
"reason": "insufficient funds"
},
{
"id": "TXN-004",
"type": "deposit",
"account_id": "ACC-002",
"amount": -100.0,
"description": "Invalid negative deposit",
"status": "rejected",
"reason": "amount must be positive"
},
{
"id": "TXN-005",
"type": "deposit",
"account_id": "ACC-003",
"amount": 1000.0,
"description": "Deposit to frozen account",
"status": "rejected",
"reason": "account is frozen"
},
{
"id": "TXN-007",
"type": "withdrawal",
"account_id": "ACC-999",
"amount": 50.0,
"description": "Unknown account",
"status": "rejected",
"reason": "account not found"
},
{
"id": "TXN-008",
"type": "deposit",
"account_id": "ACC-001",
"amount": 0,
"description": "Zero-amount deposit",
"status": "rejected",
"reason": "amount must be positive"
}
]
}

View File

@ -0,0 +1,61 @@
{
"transactions": [
{
"id": "TXN-001",
"type": "deposit",
"account_id": "ACC-001",
"amount": 500.00,
"description": "Salary payment"
},
{
"id": "TXN-002",
"type": "withdrawal",
"account_id": "ACC-001",
"amount": 200.00,
"description": "ATM withdrawal"
},
{
"id": "TXN-003",
"type": "withdrawal",
"account_id": "ACC-002",
"amount": 1500.00,
"description": "Rent payment - exceeds balance"
},
{
"id": "TXN-004",
"type": "deposit",
"account_id": "ACC-002",
"amount": -100.00,
"description": "Invalid negative deposit"
},
{
"id": "TXN-005",
"type": "deposit",
"account_id": "ACC-003",
"amount": 1000.00,
"description": "Deposit to frozen account"
},
{
"id": "TXN-006",
"type": "transfer",
"account_id": "ACC-001",
"to_account_id": "ACC-002",
"amount": 750.00,
"description": "Transfer to Bob"
},
{
"id": "TXN-007",
"type": "withdrawal",
"account_id": "ACC-999",
"amount": 50.00,
"description": "Unknown account"
},
{
"id": "TXN-008",
"type": "deposit",
"account_id": "ACC-001",
"amount": 0,
"description": "Zero-amount deposit"
}
]
}

View File

@ -0,0 +1,486 @@
"""
============================================================================
Example 0: Tokens, Embeddings, and Language Similarity An Introduction
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
GOAL:
Before we look at CODE embeddings, we need to understand the
foundational concepts: tokenization and text embeddings. This script
walks through the full pipeline step by step, using German words
and phrases so you can build intuition in your native language.
The pipeline is: Text Tokens Token IDs Embedding Vectors
WHAT YOU WILL LEARN:
1. How text is split into TOKENS (sub-word units)
2. How tokens are mapped to integer IDs (the model's vocabulary)
3. How token IDs become dense EMBEDDING VECTORS (768 dimensions)
4. How cosine similarity measures meaning similar phrases are
close in vector space, different phrases are far apart
5. How to VISUALIZE the embedding space in 2D using PCA
LANGUAGE:
All examples use German words and phrases to make the concepts
tangible. The model (multilingual) handles German natively.
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel, BertTokenizer
import torch.nn.functional as F
from sklearn.decomposition import PCA
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use("Agg")
# ── Device selection ──────────────────────────────────────────────────────
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load a MULTILINGUAL EMBEDDING model ───────────────────────────────────
# We use paraphrase-multilingual-mpnet-base-v2: a sentence embedding model
# fine-tuned for semantic similarity across 50+ languages including German.
# It uses an XLM-RoBERTa backbone and produces 768-dimensional embeddings
# where cosine similarity directly reflects semantic similarity.
MODEL_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()
print("Model loaded.\n")
# ── Load a German-only tokenizer for comparison ──────────────────────────
# gbert-base uses WordPiece trained exclusively on German text (~31k vocab).
# We only load its tokenizer — no model weights needed.
GERMAN_TOKENIZER_NAME = "deepset/gbert-base"
print(f"Loading German tokenizer: {GERMAN_TOKENIZER_NAME} ...")
german_tokenizer = BertTokenizer.from_pretrained(GERMAN_TOKENIZER_NAME)
print("German tokenizer loaded.\n")
# ══════════════════════════════════════════════════════════════════════════
# PART 1: TOKENIZATION — How text becomes numbers
# ══════════════════════════════════════════════════════════════════════════
print("=" * 70)
print("PART 1: TOKENIZATION")
print("=" * 70)
print("""
Neural networks cannot read text they only understand numbers.
TOKENIZATION is the first step: splitting text into sub-word pieces
called TOKENS, then mapping each token to an integer ID.
We compare two tokenizers:
gbert (German-only, ~31k vocab) trained exclusively on German text
mpnet (multilingual, ~250k vocab) trained on 100+ languages
""")
german_words = [
"Fachhochschule",
"Softwareentwicklung",
"Künstliche Intelligenz",
"Programmiersprache",
"Datenbank",
"Maschinelles Lernen",
"Graubünden",
"unhappiness", # English comparison
]
# ── 1a: German-only tokenizer (gbert / WordPiece) ────────────────────────
print("─── 1a: German-Only Tokenizer (gbert, WordPiece, 31k vocab) ───\n")
print(f"{'Word/Phrase':<28s} {'#':>3s} {'Tokens'}")
print("-" * 90)
for word in german_words:
ids = german_tokenizer.encode(word, add_special_tokens=False)
toks = german_tokenizer.convert_ids_to_tokens(ids)
print(f"{word:<28s} {len(toks):3d} {' | '.join(toks)}")
# ── 1b: Multilingual tokenizer (mpnet / SentencePiece) ───────────────────
print(f"\n─── 1b: Multilingual Tokenizer (mpnet, SentencePiece, 250k vocab) ───\n")
print(f"{'Word/Phrase':<28s} {'#':>3s} {'Tokens'}")
print("-" * 90)
for word in german_words:
ids = tokenizer.encode(word, add_special_tokens=False)
toks = tokenizer.convert_ids_to_tokens(ids)
print(f"{word:<28s} {len(toks):3d} {' | '.join(toks)}")
print("""
KEY OBSERVATIONS:
The GERMAN tokenizer keeps common words intact: "Fachhochschule" is
a SINGLE token, "Programmiersprache" splits at the natural compound
boundary "Programmier" + "sprache".
The MULTILINGUAL tokenizer fragments German more aggressively:
"Fachhochschule" 4 tokens ("Fach", "ho", "ch", "schule"), because
its 250k vocabulary is shared across 100+ languages German gets
a smaller budget per word.
Both tokenizers use STATISTICAL sub-word splitting (not morphological
analysis). The German tokenizer simply has more German-specific
entries because its entire vocabulary is dedicated to one language.
Trade-off: the multilingual tokenizer needs more tokens per German
word, but it enables CROSS-LINGUAL capabilities (comparing German
and English in the same embedding space see Part 3b).
The rest of this script uses the multilingual model for embeddings.
""")
# ══════════════════════════════════════════════════════════════════════════
# PART 2: FROM TOKENS TO EMBEDDING VECTORS
# ══════════════════════════════════════════════════════════════════════════
print("=" * 70)
print("PART 2: FROM TOKENS TO EMBEDDING VECTORS")
print("=" * 70)
print("""
Each token ID is looked up in an EMBEDDING TABLE a large matrix where
each row is a dense vector (768 dimensions in this model, up to 4096 in
large LLMs). The transformer then refines these vectors through 12 layers
of self-attention, producing contextual embeddings where each token's
vector depends on ALL surrounding tokens.
""")
example_sentence = "Der Student lernt Programmieren an der Fachhochschule"
inputs = tokenizer(example_sentence, return_tensors="pt").to(DEVICE)
token_ids = inputs["input_ids"].squeeze().tolist()
tokens = tokenizer.convert_ids_to_tokens(token_ids)
with torch.no_grad():
outputs = model(**inputs)
# outputs.last_hidden_state: shape [1, num_tokens, 768]
hidden_states = outputs.last_hidden_state.squeeze(0)
print(f'Sentence: "{example_sentence}"\n')
print(f"{'Pos':>4s} {'Token':<20s} {'ID':>7s} {'Vector (first 8 of 768 dims)...'}")
print("-" * 80)
for i, (tok, tid) in enumerate(zip(tokens, token_ids)):
vec = hidden_states[i].cpu().numpy()
vec_preview = " ".join(f"{v:+.3f}" for v in vec[:8])
print(f"{i:4d} {tok:<20s} {tid:7d} [{vec_preview} ...]")
print(f"""
KEY OBSERVATIONS:
Each token becomes a vector of {hidden_states.shape[1]} numbers.
These numbers are NOT random they encode the token's meaning
IN CONTEXT. The vector for "Fachhochschule" here is different from
the vector for "Fachhochschule" in a different sentence.
The full sentence has {len(tokens)} tokens, producing a matrix of
shape [{len(tokens)} × {hidden_states.shape[1]}].
To get a single vector for the whole sentence, we average all
token vectors (mean pooling).
""")
# ══════════════════════════════════════════════════════════════════════════
# PART 3: MEASURING SIMILARITY BETWEEN WORDS
# ══════════════════════════════════════════════════════════════════════════
print("=" * 70)
print("PART 3: WORD AND PHRASE SIMILARITY")
print("=" * 70)
print("""
If embeddings capture meaning, then SIMILAR words should have SIMILAR
vectors (high cosine similarity) and DIFFERENT words should have
DIFFERENT vectors (low cosine similarity). Let's test this with German.
""")
def embed_text(text: str) -> torch.Tensor:
"""Embed a word or phrase into a single normalized vector."""
inputs = tokenizer(text, return_tensors="pt", truncation=True,
max_length=128, padding=True).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
mask = inputs["attention_mask"].unsqueeze(-1)
embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
return F.normalize(embedding, p=2, dim=1).squeeze(0)
# ── 3a: Single word similarities ─────────────────────────────────────────
print("─── 3a: Single Word Similarities ───\n")
word_pairs = [
# Semantically SIMILAR pairs (synonyms or near-synonyms)
("Auto", "Fahrzeug"), # car / vehicle — near-synonyms
("Arzt", "Doktor"), # physician / doctor — synonyms
("Programmierer", "Entwickler"), # programmer / developer
("schnell", "rasch"), # fast / swift — synonyms
("Haus", "Gebäude"), # house / building — closely related
# SAME CATEGORY but different concepts
("Hund", "Katze"), # dog / cat — both pets, but different!
("Montag", "Freitag"), # Monday / Friday — both weekdays
# Semantically UNRELATED pairs
("Hund", "Mathematik"), # dog vs math
("Auto", "Philosophie"), # car vs philosophy
("schnell", "Datenbank"), # fast vs database
]
print(f"{'Word A':<20s} {'Word B':<20s} {'Cosine Sim':>10s} {'Relationship'}")
print("-" * 75)
for w1, w2 in word_pairs:
v1, v2 = embed_text(w1), embed_text(w2)
sim = torch.dot(v1.cpu(), v2.cpu()).item()
if sim > 0.6:
rel = "synonyms/close"
elif sim > 0.3:
rel = "related"
else:
rel = "unrelated"
bar = "" * int(max(0, sim) * 30)
print(f"{w1:<20s} {w2:<20s} {sim:10.3f} {bar} ({rel})")
print("""
KEY OBSERVATIONS:
Synonyms (Auto/Fahrzeug, Arzt/Doktor) have HIGHEST similarity.
Same-category but different concepts (Hund/Katze) have MODERATE
similarity they share context (both are pets) but a dog is NOT
a cat. The model captures this nuance!
Completely unrelated words (Hund/Mathematik) have LOW similarity.
Embedding similarity reflects MEANING OVERLAP, not just category.
""")
# ── 3b: Phrase/sentence similarities ─────────────────────────────────────
print("─── 3b: Phrase and Sentence Similarities ───\n")
phrases = {
"ML_de": "Maschinelles Lernen ist ein Teilgebiet der Informatik",
"ML_en": "Machine learning is a subfield of computer science",
"DL_de": "Deep Learning verwendet neuronale Netze mit vielen Schichten",
"Koch": "Der Koch bereitet das Abendessen in der Küche vor",
"Wetter": "Morgen wird es regnen und kalt sein",
"Prog": "Python ist eine beliebte Programmiersprache",
}
phrase_embeddings = {name: embed_text(text) for name, text in phrases.items()}
names = list(phrases.keys())
print(f"{'':>10s}", end="")
for n in names:
print(f"{n:>10s}", end="")
print()
for n1 in names:
print(f"{n1:>10s}", end="")
for n2 in names:
sim = torch.dot(phrase_embeddings[n1].cpu(),
phrase_embeddings[n2].cpu()).item()
print(f"{sim:10.3f}", end="")
print()
print("""
KEY OBSERVATIONS:
"Maschinelles Lernen..." (German) and "Machine learning..." (English)
should have HIGH similarity the model understands both languages
and maps equivalent meanings to nearby vectors.
ML and Deep Learning sentences should be moderately similar (related
topics in computer science).
The cooking sentence and weather sentence should be DISSIMILAR to
the tech sentences completely different topics.
This CROSS-LINGUAL capability is what makes multilingual embeddings
so powerful.
""")
# ══════════════════════════════════════════════════════════════════════════
# PART 4: VISUALIZING THE EMBEDDING SPACE
# ══════════════════════════════════════════════════════════════════════════
print("=" * 70)
print("PART 4: VISUALIZING THE EMBEDDING SPACE")
print("=" * 70)
print("""
768 dimensions are impossible to visualize. We use PCA to project the
vectors down to 2D while preserving as much structure as possible.
If the embeddings truly capture meaning, we should see CLUSTERS of
related words in the 2D plot.
""")
# Groups of German words organized by semantic category
word_groups = {
"Tiere": ["Hund", "Katze", "Pferd", "Vogel", "Fisch", "Kuh"],
"Technik": ["Computer", "Software", "Programmieren", "Datenbank",
"Algorithmus", "Internet"],
"Essen": ["Brot", "Käse", "Apfel", "Suppe", "Kuchen", "Wurst"],
"Natur": ["Berg", "Fluss", "Wald", "See", "Wiese", "Schnee"],
"Berufe": ["Arzt", "Lehrer", "Ingenieur", "Koch", "Pilot", "Anwalt"],
}
all_words = []
all_categories = []
all_vectors = []
print("Computing embeddings for word groups...")
for category, words in word_groups.items():
for word in words:
vec = embed_text(word).cpu().numpy()
all_words.append(word)
all_categories.append(category)
all_vectors.append(vec)
print(f" {category}: {', '.join(words)}")
X = np.stack(all_vectors)
print(f"\nEmbedding matrix: {X.shape[0]} words × {X.shape[1]} dimensions")
# ── PCA to 2D ────────────────────────────────────────────────────────────
pca = PCA(n_components=2)
X_2d = pca.fit_transform(X)
# ── Plot ──────────────────────────────────────────────────────────────────
category_names = list(word_groups.keys())
cmap = plt.cm.Set1
colors = {cat: cmap(i / len(category_names)) for i, cat in enumerate(category_names)}
fig, ax = plt.subplots(figsize=(12, 9))
for i, (word, cat) in enumerate(zip(all_words, all_categories)):
x, y = X_2d[i]
ax.scatter(x, y, c=[colors[cat]], s=120, edgecolors="black",
linewidth=0.5, zorder=3)
ax.annotate(word, (x, y), fontsize=9, ha="center", va="bottom",
xytext=(0, 7), textcoords="offset points",
fontweight="bold")
for cat in category_names:
ax.scatter([], [], c=[colors[cat]], s=100, label=cat,
edgecolors="black", linewidth=0.5)
ax.legend(loc="best", fontsize=11, title="Kategorie", title_fontsize=12,
framealpha=0.9)
var = pca.explained_variance_ratio_
ax.set_title(
"Deutsche Wörter im Embedding-Raum (768D → 2D via PCA)\n"
f"PC1: {var[0]:.1%} Varianz, PC2: {var[1]:.1%} Varianz",
fontsize=14, fontweight="bold"
)
ax.set_xlabel("Hauptkomponente 1 (PC1)", fontsize=12)
ax.set_ylabel("Hauptkomponente 2 (PC2)", fontsize=12)
ax.grid(True, alpha=0.3)
fig.tight_layout()
fig.savefig("embedding_space_german.png", dpi=150)
print(f"\nSaved: embedding_space_german.png")
# ── Second plot: Phrases including cross-lingual ──────────────────────────
print("\nComputing phrase embeddings for visualization...")
viz_phrases = {
# German CS phrases
"Maschinelles Lernen": "Technik (DE)",
"Neuronale Netze": "Technik (DE)",
"Softwareentwicklung": "Technik (DE)",
"Künstliche Intelligenz": "Technik (DE)",
# English equivalents
"Machine Learning": "Technik (EN)",
"Neural Networks": "Technik (EN)",
"Software Development": "Technik (EN)",
"Artificial Intelligence": "Technik (EN)",
# German everyday phrases
"Guten Morgen": "Alltag (DE)",
"Wie geht es Ihnen": "Alltag (DE)",
"Das Wetter ist schön": "Alltag (DE)",
"Ich gehe einkaufen": "Alltag (DE)",
# English everyday phrases
"Good morning": "Alltag (EN)",
"How are you": "Alltag (EN)",
"The weather is nice": "Alltag (EN)",
"I am going shopping": "Alltag (EN)",
}
phrase_labels = list(viz_phrases.keys())
phrase_cats = list(viz_phrases.values())
phrase_vecs = np.stack([embed_text(p).cpu().numpy() for p in phrase_labels])
pca2 = PCA(n_components=2)
P_2d = pca2.fit_transform(phrase_vecs)
cat_colors = {
"Technik (DE)": "#1f77b4",
"Technik (EN)": "#aec7e8",
"Alltag (DE)": "#d62728",
"Alltag (EN)": "#ff9896",
}
fig2, ax2 = plt.subplots(figsize=(12, 9))
for i, (label, cat) in enumerate(zip(phrase_labels, phrase_cats)):
x, y = P_2d[i]
marker = "o" if "(DE)" in cat else "s" # circle=German, square=English
ax2.scatter(x, y, c=cat_colors[cat], s=140, marker=marker,
edgecolors="black", linewidth=0.5, zorder=3)
ax2.annotate(label, (x, y), fontsize=8, ha="center", va="bottom",
xytext=(0, 8), textcoords="offset points")
for cat, color in cat_colors.items():
marker = "o" if "(DE)" in cat else "s"
ax2.scatter([], [], c=color, s=100, marker=marker, label=cat,
edgecolors="black", linewidth=0.5)
ax2.legend(loc="best", fontsize=10, title="Kategorie & Sprache",
title_fontsize=11, framealpha=0.9)
var2 = pca2.explained_variance_ratio_
ax2.set_title(
"Cross-lingual Embeddings: Deutsche & Englische Phrasen\n"
f"PC1: {var2[0]:.1%} Varianz, PC2: {var2[1]:.1%} Varianz",
fontsize=14, fontweight="bold"
)
ax2.set_xlabel("Hauptkomponente 1 (PC1)", fontsize=12)
ax2.set_ylabel("Hauptkomponente 2 (PC2)", fontsize=12)
ax2.grid(True, alpha=0.3)
fig2.tight_layout()
fig2.savefig("embedding_space_crosslingual.png", dpi=150)
print(f"Saved: embedding_space_crosslingual.png")
print(f"""
{'=' * 70}
SUMMARY: THE FULL PIPELINE
{'=' * 70}
Text Tokens Token IDs Embeddings
"Fachhochschule" [Fach, ho, [28356, 497, [0.012, -0.34,
ch, schule] 206, 72460] 0.88, ...]
(768 dimensions)
1. TOKENIZATION splits text into statistical sub-word pieces.
Splits are based on frequency, not German morphology.
Each token maps to an integer ID from the vocabulary.
2. EMBEDDING VECTORS are 768-dimensional representations of meaning.
Computed by the transformer's 12 layers of self-attention.
Similar meanings nearby vectors (high cosine similarity).
Different meanings distant vectors (low cosine similarity).
3. COSINE SIMILARITY measures how "aligned" two vectors are.
1.0 = identical meaning, 0.0 = unrelated, -1.0 = opposite.
4. CROSS-LINGUAL EMBEDDINGS map equivalent phrases in different
languages to nearby vectors. "Maschinelles Lernen" "Machine
Learning" in embedding space.
5. The SAME PRINCIPLES apply to CODE EMBEDDINGS (next examples):
Code is tokenized into sub-word pieces
A transformer produces embedding vectors
Similar code has similar vectors
This enables semantic code search, clone detection, and RAG
Check the two PNG files for visual confirmation:
embedding_space_german.png German word clusters
embedding_space_crosslingual.png DE/EN phrase alignment
""")

View File

@ -0,0 +1,231 @@
"""
============================================================================
Example 1: Computing Code Embeddings and Measuring Similarity
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
GOAL:
Load a pre-trained code embedding model, embed several code snippets,
and compute pairwise cosine similarities to see which snippets the
model considers semantically similar.
WHAT YOU WILL LEARN:
- How to load a code embedding model with PyTorch
- How code is tokenized and converted to vectors
- How cosine similarity reveals semantic relationships
- That similar functionality high similarity, different purpose low
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
# ── Device selection ──────────────────────────────────────────────────────
# PyTorch supports three backends:
# - "cuda" → NVIDIA GPUs (Linux/Windows)
# - "mps" → Apple Silicon GPUs (macOS M1/M2/M3/M4)
# - "cpu" → always available, slower
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load model and tokenizer ─────────────────────────────────────────────
# We use st-codesearch-distilroberta-base — a DistilRoBERTa model (82M params)
# specifically fine-tuned on 1.38M code-comment pairs from CodeSearchNet using
# contrastive learning. It produces 768-dim embeddings optimized for matching
# natural language descriptions to code, making it ideal for code search and
# similarity tasks.
MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval() # disable dropout — we want deterministic embeddings
print("Model loaded.\n")
# ── Define code snippets to compare ──────────────────────────────────────
# We intentionally include:
# - Two sorting functions (similar purpose, different implementation)
# - A function that does something completely different (JSON parsing)
# - A sorting function in a different style (list comprehension)
snippets = {
"bubble_sort": """
def bubble_sort(arr):
n = len(arr)
for i in range(n):
for j in range(0, n - i - 1):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
return arr
""",
"quick_sort": """
def quick_sort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
""",
"sorted_builtin": """
def sort_list(data):
return sorted(data)
""",
"parse_json": """
import json
def parse_config(filepath):
with open(filepath, 'r') as f:
config = json.load(f)
return config
""",
"read_csv": """
import csv
def read_csv_file(filepath):
rows = []
with open(filepath, 'r') as f:
reader = csv.reader(f)
for row in reader:
rows.append(row)
return rows
""",
}
def embed_code(code_text: str) -> torch.Tensor:
"""
Convert a code snippet into a single embedding vector.
This function implements the full pipeline from the lecture:
raw code tokens token embeddings single vector unit vector
Why a function like this is needed:
A transformer model outputs one vector *per token*, but we need a single
vector that represents the entire snippet so we can compare snippets using
cosine similarity. This function handles tokenization, the forward pass,
pooling (many vectors one), and normalization (arbitrary length unit).
Returns:
A 768-dimensional unit vector (torch.Tensor) representing the code.
"""
# ── Step 1: Tokenization ──────────────────────────────────────────────
# The model cannot read raw text. We must split the code into sub-word
# tokens and convert each token to its integer ID from the vocabulary.
#
# The tokenizer also produces an "attention mask": a tensor of 1s and 0s
# indicating which positions are real tokens (1) vs. padding (0).
# Padding is needed because tensors must have uniform length.
#
# truncation=True: if the code exceeds 512 tokens, cut it off.
# Why 512? This model was trained with a max context of 512 tokens.
# Anything beyond that would be out-of-distribution.
inputs = tokenizer(
code_text,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
).to(DEVICE)
# ── Step 2: Forward pass through the transformer ──────────────────────
# The model processes all tokens through multiple layers of self-attention
# (as covered in the lecture). Each layer refines the representation.
#
# torch.no_grad() disables gradient tracking because we are only doing
# inference, not training. This saves memory and speeds things up.
#
# The output contains a CONTEXTUAL embedding for EACH token:
# outputs.last_hidden_state has shape [1, seq_len, 768]
# → 1 batch, seq_len tokens, each represented as a 768-dim vector.
#
# These are NOT the static input embeddings — they have been transformed
# by the attention mechanism, so each token's vector now encodes context
# from ALL other tokens in the sequence.
with torch.no_grad():
outputs = model(**inputs)
# ── Step 3: Mean pooling — many token vectors → one snippet vector ────
# Problem: we have one 768-dim vector per token, but we need ONE vector
# for the entire code snippet (so we can compare it to other snippets).
#
# Solution: average all token vectors. This is called "mean pooling."
#
# Subtlety: we must ignore padding tokens. If the code has 30 real tokens
# but the tensor was padded to 40, we don't want the 10 zero-vectors from
# padding to dilute the average. The attention mask lets us do this:
# 1. Multiply each token vector by its mask (1 for real, 0 for padding)
# 2. Sum the masked vectors
# 3. Divide by the number of real tokens (not the padded length)
attention_mask = inputs["attention_mask"].unsqueeze(-1) # [1, seq_len, 1]
masked_output = outputs.last_hidden_state * attention_mask
embedding = masked_output.sum(dim=1) / attention_mask.sum(dim=1)
# ── Step 4: L2 normalization — project onto the unit hypersphere ──────
# From the lecture: when vectors are normalized to length 1, cosine
# similarity simplifies to the dot product:
#
# cos(θ) = (a · b) / (‖a‖ · ‖b‖) → if ‖a‖=‖b‖=1 → cos(θ) = a · b
#
# This is not just a convenience — it is standard practice in production
# embedding systems (OpenAI, Cohere, etc.) because:
# - Dot products are faster to compute than full cosine similarity
# - Vector databases are optimized for dot-product search
# - It removes magnitude differences so we compare direction only
embedding = F.normalize(embedding, p=2, dim=1)
return embedding.squeeze(0) # remove batch dim → shape: [768]
# ── Compute embeddings for all snippets ───────────────────────────────────
print("Computing embeddings...")
embeddings = {}
for name, code in snippets.items():
embeddings[name] = embed_code(code)
num_tokens = len(tokenizer.encode(code))
print(f" {name:20s}{num_tokens:3d} tokens → vector of dim {embeddings[name].shape[0]}")
print()
# ── Compute pairwise cosine similarities ──────────────────────────────────
# cosine_similarity = dot product of unit vectors (we already normalized above)
names = list(embeddings.keys())
print("Pairwise Cosine Similarities:")
print(f"{'':22s}", end="")
for n in names:
print(f"{n:>16s}", end="")
print()
for i, n1 in enumerate(names):
print(f"{n1:22s}", end="")
for j, n2 in enumerate(names):
sim = torch.dot(embeddings[n1].cpu(), embeddings[n2].cpu()).item()
print(f"{sim:16.3f}", end="")
print()
# ── Interpretation ────────────────────────────────────────────────────────
print("\n" + "=" * 70)
print("INTERPRETATION:")
print("=" * 70)
print("""
- bubble_sort, quick_sort, and sorted_builtin should have HIGH similarity
(all perform sorting, despite very different implementations).
- parse_json and read_csv should be similar to each other (both read files)
but DISSIMILAR to the sorting functions (different purpose).
- This demonstrates that code embeddings capture WHAT code does,
not just HOW it looks syntactically.
""")

View File

@ -0,0 +1,251 @@
"""
============================================================================
Example 2: Text-to-Code Semantic Search
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
GOAL:
Build a mini code search engine: given a natural language query like
"sort a list", find the most relevant code snippet from a collection.
This is the core mechanism behind semantic code search in tools like
Cursor, GitHub Copilot, and code search engines.
WHAT YOU WILL LEARN:
- How the SAME embedding model maps both text and code into a shared
vector space this is what makes text-to-code search possible.
- How to build a simple search index and query it.
- Why embedding-based search beats keyword search for code.
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
# ── Device selection ──────────────────────────────────────────────────────
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load model ────────────────────────────────────────────────────────────
MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()
print("Model loaded.\n")
# ── Code "database" ──────────────────────────────────────────────────────
# Imagine these are functions in a large codebase that we want to search.
code_database = [
{
"name": "binary_search",
"code": """
def binary_search(arr, target):
low, high = 0, len(arr) - 1
while low <= high:
mid = (low + high) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
low = mid + 1
else:
high = mid - 1
return -1
"""
},
{
"name": "merge_sort",
"code": """
def merge_sort(arr):
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)
"""
},
{
"name": "read_json_file",
"code": """
import json
def read_json_file(path):
with open(path, 'r') as f:
return json.load(f)
"""
},
{
"name": "calculate_average",
"code": """
def calculate_average(numbers):
if not numbers:
return 0.0
return sum(numbers) / len(numbers)
"""
},
{
"name": "connect_database",
"code": """
import sqlite3
def connect_database(db_path):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
return conn, cursor
"""
},
{
"name": "send_http_request",
"code": """
import requests
def send_http_request(url, method='GET', data=None):
if method == 'GET':
response = requests.get(url)
else:
response = requests.post(url, json=data)
return response.json()
"""
},
{
"name": "flatten_nested_list",
"code": """
def flatten(nested_list):
result = []
for item in nested_list:
if isinstance(item, list):
result.extend(flatten(item))
else:
result.append(item)
return result
"""
},
{
"name": "count_words",
"code": """
def count_words(text):
words = text.lower().split()
word_count = {}
for word in words:
word_count[word] = word_count.get(word, 0) + 1
return word_count
"""
},
{
"name": "validate_email",
"code": """
import re
def validate_email(email):
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
return bool(re.match(pattern, email))
"""
},
{
"name": "fibonacci",
"code": """
def fibonacci(n):
if n <= 1:
return n
a, b = 0, 1
for _ in range(2, n + 1):
a, b = b, a + b
return b
"""
},
]
def embed_text(text: str) -> torch.Tensor:
"""Embed a piece of text or code into a normalized vector."""
inputs = tokenizer(
text, return_tensors="pt", truncation=True, max_length=512, padding=True
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
# Mean pooling over non-padding tokens
mask = inputs["attention_mask"].unsqueeze(-1)
embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
return F.normalize(embedding, p=2, dim=1).squeeze(0)
# ── Step 1: Index the code database ───────────────────────────────────────
# In a real system this would be stored in a vector database (ChromaDB,
# Pinecone, pgvector). Here we keep it simple with a list of tensors.
print("Indexing code database...")
code_vectors = []
for entry in code_database:
vec = embed_text(entry["code"])
code_vectors.append(vec)
print(f" Indexed: {entry['name']}")
# Stack into a matrix: shape [num_snippets, embedding_dim]
code_matrix = torch.stack(code_vectors)
print(f"\nIndex built: {code_matrix.shape[0]} snippets, {code_matrix.shape[1]} dimensions\n")
# ── Step 2: Search with natural language queries ──────────────────────────
queries = [
"sort a list of numbers",
"find an element in a sorted array",
"compute the mean of a list",
"make an HTTP API call",
"open and read a JSON file",
"check if an email address is valid",
"count word frequencies in a string",
"generate fibonacci numbers",
"connect to a SQL database",
"flatten a nested list into a single list",
]
print("=" * 70)
print("SEMANTIC CODE SEARCH RESULTS")
print("=" * 70)
for query in queries:
# Embed the natural language query with the SAME model
query_vec = embed_text(query)
# Compute cosine similarity against all code embeddings
# Because vectors are normalized, dot product = cosine similarity
similarities = torch.mv(code_matrix.cpu(), query_vec.cpu())
# Rank results by similarity (highest first)
ranked_indices = torch.argsort(similarities, descending=True)
print(f'\nQuery: "{query}"')
print(f" Rank Score Function")
print(f" ---- ----- --------")
for rank, idx in enumerate(ranked_indices[:3]): # show top 3
score = similarities[idx].item()
name = code_database[idx]["name"]
marker = " ← best match" if rank == 0 else ""
print(f" {rank+1:4d} {score:.3f} {name}{marker}")
print("\n" + "=" * 70)
print("KEY OBSERVATIONS:")
print("=" * 70)
print("""
1. The model maps NATURAL LANGUAGE queries and CODE into the same vector
space. This is why "sort a list" finds merge_sort and "find an element
in a sorted array" finds binary_search — even though the queries
contain none of the function identifiers.
2. This is fundamentally different from grep/keyword search:
- grep "sort" would miss functions named "order" or "arrange"
- grep "find element" would miss "binary_search"
Embeddings understand MEANING, not just string matching.
3. This is exactly how Cursor, Copilot, and other AI coding tools
retrieve relevant code from your project to feed into the LLM.
""")

View File

@ -0,0 +1,199 @@
"""
============================================================================
Example 3: Cross-Language Code Similarity
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
GOAL:
Demonstrate that code embeddings capture FUNCTIONALITY, not syntax.
The same algorithm written in Python, JavaScript, Java, and C++
should produce similar embedding vectors even though the surface
syntax is completely different.
WHAT YOU WILL LEARN:
- Code embedding models create a language-agnostic semantic space.
- Functionally equivalent code clusters together regardless of language.
- This enables cross-language code search (e.g., find the Java
equivalent of a Python function).
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
# ── Device selection ──────────────────────────────────────────────────────
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load model ────────────────────────────────────────────────────────────
MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()
print("Model loaded.\n")
# ── Same algorithm in four languages ──────────────────────────────────────
# Task A: Factorial — a simple recursive/iterative computation
# Task B: Reverse a string
# If embeddings are truly semantic, Task A functions should cluster together
# and Task B functions should cluster together, regardless of language.
code_snippets = {
# ── Task A: Factorial ──
"factorial_python": """
def factorial(n):
result = 1
for i in range(2, n + 1):
result *= i
return result
""",
"factorial_javascript": """
function factorial(n) {
let result = 1;
for (let i = 2; i <= n; i++) {
result *= i;
}
return result;
}
""",
"factorial_java": """
public static int factorial(int n) {
int result = 1;
for (int i = 2; i <= n; i++) {
result *= i;
}
return result;
}
""",
"factorial_cpp": """
int factorial(int n) {
int result = 1;
for (int i = 2; i <= n; i++) {
result *= i;
}
return result;
}
""",
# ── Task B: Reverse a string ──
"reverse_python": """
def reverse_string(s):
return s[::-1]
""",
"reverse_javascript": """
function reverseString(s) {
return s.split('').reverse().join('');
}
""",
"reverse_java": """
public static String reverseString(String s) {
return new StringBuilder(s).reverse().toString();
}
""",
"reverse_cpp": """
std::string reverseString(std::string s) {
std::reverse(s.begin(), s.end());
return s;
}
""",
}
def embed_code(code: str) -> torch.Tensor:
"""Embed code into a normalized vector."""
inputs = tokenizer(
code, return_tensors="pt", truncation=True, max_length=512, padding=True
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
mask = inputs["attention_mask"].unsqueeze(-1)
embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
return F.normalize(embedding, p=2, dim=1).squeeze(0)
# ── Compute all embeddings ────────────────────────────────────────────────
print("Computing embeddings for all snippets...")
embeddings = {}
for name, code in code_snippets.items():
embeddings[name] = embed_code(code)
print(f"Done. {len(embeddings)} embeddings computed.\n")
# ── Compute similarity matrix ─────────────────────────────────────────────
names = list(embeddings.keys())
n = len(names)
print("=" * 70)
print("CROSS-LANGUAGE SIMILARITY MATRIX")
print("=" * 70)
# Print header (abbreviated names for readability)
short_names = [n.replace("factorial_", "F:").replace("reverse_", "R:") for n in names]
print(f"\n{'':14s}", end="")
for sn in short_names:
print(f"{sn:>10s}", end="")
print()
for i in range(n):
print(f"{short_names[i]:14s}", end="")
for j in range(n):
sim = torch.dot(embeddings[names[i]].cpu(), embeddings[names[j]].cpu()).item()
print(f"{sim:10.3f}", end="")
print()
# ── Compute average within-task and across-task similarities ──────────────
factorial_names = [n for n in names if "factorial" in n]
reverse_names = [n for n in names if "reverse" in n]
within_factorial = []
within_reverse = []
across_tasks = []
for i, n1 in enumerate(names):
for j, n2 in enumerate(names):
if i >= j:
continue
sim = torch.dot(embeddings[n1].cpu(), embeddings[n2].cpu()).item()
if n1 in factorial_names and n2 in factorial_names:
within_factorial.append(sim)
elif n1 in reverse_names and n2 in reverse_names:
within_reverse.append(sim)
else:
across_tasks.append(sim)
print("\n" + "=" * 70)
print("ANALYSIS")
print("=" * 70)
print(f"\nAvg similarity WITHIN factorial (across languages): "
f"{sum(within_factorial)/len(within_factorial):.3f}")
print(f"Avg similarity WITHIN reverse (across languages): "
f"{sum(within_reverse)/len(within_reverse):.3f}")
print(f"Avg similarity ACROSS tasks (factorial vs reverse): "
f"{sum(across_tasks)/len(across_tasks):.3f}")
print("""
EXPECTED RESULT:
Within-task similarity should be MUCH HIGHER than across-task similarity.
This proves that the embedding model groups code by WHAT IT DOES,
not by WHAT LANGUAGE it is written in.
factorial_python factorial_java factorial_cpp factorial_javascript
reverse_python reverse_java reverse_cpp reverse_javascript
factorial_* reverse_*
This is what enables cross-language code search: you can find a Java
implementation by providing a Python query, or vice versa.
""")

View File

@ -0,0 +1,237 @@
"""
============================================================================
Example 4: Code Clone Detection
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
GOAL:
Detect code clones (duplicate/similar code) in a collection of
functions using embeddings. We simulate a real-world scenario
where a codebase contains multiple near-duplicate implementations
that should be refactored into a single function.
WHAT YOU WILL LEARN:
- The four types of code clones (Type 14)
- How embeddings detect clones that text-based tools miss
- Ranking-based clone detection via cosine similarity
- Practical application: finding refactoring opportunities
CLONE TYPES:
Type 1: Exact copy (trivial grep can find these)
Type 2: Renamed variables (grep misses these)
Type 3: Modified structure (added/removed lines)
Type 4: Same functionality, completely different implementation
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from itertools import combinations
# ── Device selection ──────────────────────────────────────────────────────
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load model ────────────────────────────────────────────────────────────
MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()
print("Model loaded.\n")
# ── Simulated codebase ────────────────────────────────────────────────────
# These functions simulate what you'd find in a messy, real-world codebase
# where different developers wrote similar functionality independently.
#
# IMPORTANT: The clone groups share ZERO common words (besides Python
# keywords). This demonstrates that embeddings capture semantics, not
# surface-level text overlap. grep would never find these.
codebase = {
# ── Clone group 1: Computing the maximum of a list ──
# Three completely different implementations — no shared identifiers,
# no shared structure, but identical purpose.
"utils/find_max.py": """
def find_max(numbers):
result = numbers[0]
for candidate in numbers[1:]:
if candidate > result:
result = candidate
return result
""",
"legacy/find_max_old.py": """
def find_max(numbers):
result = numbers[0]
for candidate in numbers[1:]:
if candidate > result:
result = candidate
return result
""",
"analytics/top_scorer.py": """
import heapq
def fetch_top_element(collection):
return heapq.nlargest(1, collection)[0]
""",
"stats/dominant_value.py": """
def extract_peak(dataset):
dataset = sorted(dataset, reverse=True)
return dataset[0]
""",
# ── Clone group 2: String reversal ──
# Two implementations with zero lexical overlap — slicing vs index-based.
"text/flip_text.py": """
def flip_text(content):
return content[::-1]
""",
"helpers/mirror.py": """
def mirror_characters(phrase):
output = []
idx = len(phrase) - 1
while idx >= 0:
output.append(phrase[idx])
idx -= 1
return ''.join(output)
""",
# ── Not a clone: completely different functionality ──
# Each uses a different Python construct and domain to ensure
# they don't cluster with each other or with the clone groups.
"math/square_root.py": """
def square_root(x):
return x ** 0.5
""",
"calendar/leap_year.py": """
def is_leap_year(year):
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
""",
"formatting/currency.py": """
def format_currency(amount, symbol="$"):
return f"{symbol}{amount:,.2f}"
""",
}
def embed_code(code: str) -> torch.Tensor:
"""Embed code into a normalized vector."""
inputs = tokenizer(
code, return_tensors="pt", truncation=True, max_length=512, padding=True
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
mask = inputs["attention_mask"].unsqueeze(-1)
embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
return F.normalize(embedding, p=2, dim=1).squeeze(0)
# ── Embed all functions ───────────────────────────────────────────────────
print("Embedding all functions in the codebase...")
embeddings = {}
for path, code in codebase.items():
embeddings[path] = embed_code(code)
print(f" {path}")
print()
# ── Compute pairwise similarity matrix ────────────────────────────────────
paths = list(embeddings.keys())
n = len(paths)
def short_name(path):
"""Extract a readable label from the file path."""
return path.split("/")[-1].replace(".py", "")
labels = [short_name(p) for p in paths]
sim_matrix = {}
for i in range(n):
for j in range(n):
sim = torch.dot(embeddings[paths[i]].cpu(), embeddings[paths[j]].cpu()).item()
sim_matrix[(i, j)] = sim
# ── Print similarity matrix ───────────────────────────────────────────────
col_w = max(len(l) for l in labels) + 2
header_w = col_w
print("=" * 70)
print("SIMILARITY MATRIX")
print("=" * 70)
print(f"\n{'':>{header_w}}", end="")
for label in labels:
print(f"{label:>{col_w}}", end="")
print()
for i in range(n):
print(f"{labels[i]:>{header_w}}", end="")
for j in range(n):
print(f"{sim_matrix[(i, j)]:>{col_w}.3f}", end="")
print()
# ── Most similar match per function ───────────────────────────────────────
print()
print(f"{'BEST MATCH':>{header_w}}", end="")
for i in range(n):
best_j, best_sim = -1, -1.0
for j in range(n):
if i != j and sim_matrix[(i, j)] > best_sim:
best_sim = sim_matrix[(i, j)]
best_j = j
print(f"{labels[best_j]:>{col_w}}", end="")
print()
print(f"{'(similarity)':>{header_w}}", end="")
for i in range(n):
best_sim = max(sim_matrix[(i, j)] for j in range(n) if i != j)
print(f"{best_sim:>{col_w}.3f}", end="")
print()
print(f"""
{'=' * 70}
INTERPRETATION:
{'=' * 70}
HOW TO READ THE TABLE:
Each cell shows the cosine similarity between two functions.
The BEST MATCH row shows which other function is most similar
to each column these are the clone candidates a developer
would investigate.
EXPECTED CLONE GROUPS:
1. find_max find_max_old (Type 1: exact copy)
Similarity 1.000
2. find_max / fetch_top_element / extract_peak (Type 4 clones)
Same purpose (find the largest value), completely different
code: for-loop vs heapq.nlargest() vs sorted(reverse=True)
Zero shared identifiers between implementations
3. flip_text mirror_characters (Type 4 clone)
Same purpose (reverse a string), completely different code:
slicing ([::-1]) vs while-loop with index
Zero shared identifiers
NON-CLONES:
square_root, is_leap_year, format_currency each use a different
domain and code structure. Their best matches should have low
similarity compared to the clone groups.
KEY INSIGHT:
The clone groups share NO common words (besides Python keywords
like def/return/if). grep or any text-matching tool would never
find these clones. Only semantic understanding which is what
embeddings provide can detect that these functions do the same
thing despite having completely different code.
""")

View File

@ -0,0 +1,216 @@
"""
============================================================================
Example 5: Visualizing Code Embeddings with PCA and t-SNE
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
GOAL:
Reduce 768-dimensional code embeddings to 2D and plot them.
This makes the embedding space visible: you can SEE that similar
code clusters together and different code is far apart.
WHAT YOU WILL LEARN:
- How PCA projects high-dimensional vectors to 2D (linear reduction)
- How t-SNE creates a non-linear 2D map that preserves neighborhoods
- How to interpret embedding space visualizations
- That code functionality determines position, not syntax or language
OUTPUT:
Saves two PNG plots: code_embeddings_pca.png and code_embeddings_tsne.png
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib
# Use a non-interactive backend so the script works in headless environments
matplotlib.use("Agg")
# ── Device selection ──────────────────────────────────────────────────────
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load model ────────────────────────────────────────────────────────────
MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()
print("Model loaded.\n")
# ── Code snippets organized by CATEGORY ───────────────────────────────────
# Each category represents a type of task. We expect snippets within the
# same category to cluster together in the embedding space.
categories = {
"Sorting": {
"bubble_sort_py": "def bubble_sort(arr):\n n = len(arr)\n for i in range(n):\n for j in range(n-i-1):\n if arr[j] > arr[j+1]:\n arr[j], arr[j+1] = arr[j+1], arr[j]\n return arr",
"quick_sort_py": "def quick_sort(a):\n if len(a) <= 1: return a\n p = a[0]\n return quick_sort([x for x in a[1:] if x < p]) + [p] + quick_sort([x for x in a[1:] if x >= p])",
"sort_js": "function sortArray(arr) { return arr.sort((a, b) => a - b); }",
"insertion_sort": "def insertion_sort(arr):\n for i in range(1, len(arr)):\n key = arr[i]\n j = i - 1\n while j >= 0 and arr[j] > key:\n arr[j+1] = arr[j]\n j -= 1\n arr[j+1] = key\n return arr",
},
"File I/O": {
"read_json": "import json\ndef read_json(path):\n with open(path) as f:\n return json.load(f)",
"write_file": "def write_file(path, content):\n with open(path, 'w') as f:\n f.write(content)",
"read_csv": "import csv\ndef read_csv(path):\n with open(path) as f:\n return list(csv.reader(f))",
"read_yaml": "import yaml\ndef read_yaml(path):\n with open(path) as f:\n return yaml.safe_load(f)",
},
"String ops": {
"reverse_str": "def reverse(s): return s[::-1]",
"capitalize": "def capitalize_words(s): return ' '.join(w.capitalize() for w in s.split())",
"count_chars": "def count_chars(s):\n return {c: s.count(c) for c in set(s)}",
"is_palindrome": "def is_palindrome(s): return s == s[::-1]",
},
"Math": {
"factorial": "def factorial(n):\n r = 1\n for i in range(2, n+1): r *= i\n return r",
"fibonacci": "def fib(n):\n a, b = 0, 1\n for _ in range(n): a, b = b, a+b\n return a",
"gcd": "def gcd(a, b):\n while b: a, b = b, a % b\n return a",
"is_prime": "def is_prime(n):\n if n < 2: return False\n for i in range(2, int(n**0.5)+1):\n if n % i == 0: return False\n return True",
},
"Networking": {
"http_get": "import requests\ndef http_get(url): return requests.get(url).json()",
"fetch_url": "import urllib.request\ndef fetch(url):\n with urllib.request.urlopen(url) as r:\n return r.read().decode()",
"post_data": "import requests\ndef post_json(url, data): return requests.post(url, json=data).status_code",
"download_file": "import urllib.request\ndef download(url, path): urllib.request.urlretrieve(url, path)",
},
}
def embed_code(code: str) -> torch.Tensor:
"""Embed code into a normalized vector."""
inputs = tokenizer(
code, return_tensors="pt", truncation=True, max_length=512, padding=True
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
mask = inputs["attention_mask"].unsqueeze(-1)
embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
return F.normalize(embedding, p=2, dim=1).squeeze(0).cpu().numpy()
# ── Compute embeddings ────────────────────────────────────────────────────
print("Computing embeddings...")
all_embeddings = []
all_labels = []
all_categories = []
for category, snippets in categories.items():
for label, code in snippets.items():
vec = embed_code(code)
all_embeddings.append(vec)
all_labels.append(label)
all_categories.append(category)
print(f" [{category:12s}] {label}")
# Convert to numpy matrix: shape [num_snippets, 768]
X = np.stack(all_embeddings)
print(f"\nEmbedding matrix: {X.shape[0]} snippets × {X.shape[1]} dimensions\n")
# ── Color map for categories ──────────────────────────────────────────────
category_names = list(categories.keys())
colors = plt.cm.Set1(np.linspace(0, 1, len(category_names)))
color_map = {cat: colors[i] for i, cat in enumerate(category_names)}
point_colors = [color_map[cat] for cat in all_categories]
# ── Plot 1: PCA ──────────────────────────────────────────────────────────
# PCA finds the two directions of maximum variance in the 1024-dim space
# and projects all points onto those two directions.
print("Computing PCA (2 components)...")
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
fig, ax = plt.subplots(figsize=(10, 8))
for i, (x, y) in enumerate(X_pca):
ax.scatter(x, y, c=[point_colors[i]], s=100, edgecolors="black", linewidth=0.5, zorder=3)
ax.annotate(all_labels[i], (x, y), fontsize=7, ha="center", va="bottom",
xytext=(0, 6), textcoords="offset points")
# Legend
for cat in category_names:
ax.scatter([], [], c=[color_map[cat]], s=80, label=cat, edgecolors="black", linewidth=0.5)
ax.legend(loc="best", fontsize=9, title="Category", title_fontsize=10)
variance_explained = pca.explained_variance_ratio_
ax.set_title(f"Code Embeddings — PCA Projection\n"
f"(PC1: {variance_explained[0]:.1%} variance, PC2: {variance_explained[1]:.1%} variance)",
fontsize=13)
ax.set_xlabel("Principal Component 1", fontsize=11)
ax.set_ylabel("Principal Component 2", fontsize=11)
ax.grid(True, alpha=0.3)
fig.tight_layout()
fig.savefig("code_embeddings_pca.png", dpi=150)
print(f" Saved: code_embeddings_pca.png")
print(f" Variance explained: PC1={variance_explained[0]:.1%}, PC2={variance_explained[1]:.1%}\n")
# ── Plot 2: t-SNE ────────────────────────────────────────────────────────
# t-SNE is a non-linear method that preserves LOCAL neighborhood structure.
# Points that are close in 1024-dim space stay close in 2D.
# Perplexity controls the balance between local and global structure.
print("Computing t-SNE (this may take a few seconds)...")
tsne = TSNE(n_components=2, perplexity=5, random_state=42, max_iter=1000)
X_tsne = tsne.fit_transform(X)
fig, ax = plt.subplots(figsize=(10, 8))
for i, (x, y) in enumerate(X_tsne):
ax.scatter(x, y, c=[point_colors[i]], s=100, edgecolors="black", linewidth=0.5, zorder=3)
ax.annotate(all_labels[i], (x, y), fontsize=7, ha="center", va="bottom",
xytext=(0, 6), textcoords="offset points")
for cat in category_names:
ax.scatter([], [], c=[color_map[cat]], s=80, label=cat, edgecolors="black", linewidth=0.5)
ax.legend(loc="best", fontsize=9, title="Category", title_fontsize=10)
ax.set_title("Code Embeddings — t-SNE Projection\n"
"(non-linear dimensionality reduction)", fontsize=13)
ax.set_xlabel("t-SNE Dimension 1", fontsize=11)
ax.set_ylabel("t-SNE Dimension 2", fontsize=11)
ax.grid(True, alpha=0.3)
fig.tight_layout()
fig.savefig("code_embeddings_tsne.png", dpi=150)
print(f" Saved: code_embeddings_tsne.png\n")
print("=" * 70)
print("INTERPRETATION")
print("=" * 70)
print(f"""
Both plots project {X.shape[1]}-dimensional embedding vectors to 2D:
PCA (Principal Component Analysis):
- Linear projection onto the two axes of maximum variance.
- Preserves global structure: large distances are meaningful.
- Good for seeing overall separation between categories.
- The % variance tells you how much information is retained.
t-SNE (t-distributed Stochastic Neighbor Embedding):
- Non-linear: distorts distances but preserves neighborhoods.
- Points that are close in the original space stay close in 2D.
- Better at revealing tight clusters within categories.
- Distances BETWEEN clusters are not meaningful.
EXPECTED RESULT:
You should see 5 distinct clusters, one per category:
- Sorting functions (bubble, quick, insertion, JS sort) cluster together
- File I/O functions cluster together
- String operations cluster together
- Math functions cluster together
- Networking functions cluster together
This visually confirms that code embeddings organize code by
PURPOSE, not by surface syntax or programming language.
""")

View File

@ -0,0 +1,716 @@
"""
============================================================================
Example 6: PCA Denoising Can Fewer Dimensions Improve Similarity?
============================================================================
AISE501 AI in Software Engineering I
Fachhochschule Graubünden
HYPOTHESIS:
Embedding vectors live in a 768-dimensional space, but most of the
semantic signal may be concentrated in a small number of principal
components. The remaining dimensions could add "noise" that dilutes
cosine similarity. If true, projecting embeddings onto a small PCA
subspace should INCREASE similarity within semantic groups and
DECREASE similarity across groups making code search sharper.
WHAT YOU WILL LEARN:
- How PCA decomposes the embedding space into ranked components
- How to measure retrieval quality (intra- vs inter-group similarity)
- Whether dimensionality reduction helps or hurts in practice
- The concept of an "optimal" embedding dimension for a given task
OUTPUT:
Saves pca_denoising_analysis.png with three sub-plots.
HARDWARE:
Works on CPU, CUDA (NVIDIA), and MPS (Apple Silicon Mac).
============================================================================
"""
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
# ── Device selection ──────────────────────────────────────────────────────
def get_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")
DEVICE = get_device()
print(f"Using device: {DEVICE}\n")
# ── Load model ────────────────────────────────────────────────────────────
MODEL_NAME = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
print(f"Loading model: {MODEL_NAME} ...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME).to(DEVICE)
model.eval()
print("Model loaded.\n")
# ── Code snippets organized into semantic GROUPS ──────────────────────────
# We need clear groups so we can measure intra-group vs inter-group similarity.
groups = {
"Sorting": {
"bubble_sort": """
def bubble_sort(arr):
n = len(arr)
for i in range(n):
for j in range(0, n - i - 1):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
return arr""",
"quick_sort": """
def quick_sort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)""",
"merge_sort": """
def merge_sort(arr):
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
merged = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
merged.append(left[i]); i += 1
else:
merged.append(right[j]); j += 1
return merged + left[i:] + right[j:]""",
"insertion_sort": """
def insertion_sort(arr):
for i in range(1, len(arr)):
key = arr[i]
j = i - 1
while j >= 0 and arr[j] > key:
arr[j + 1] = arr[j]
j -= 1
arr[j + 1] = key
return arr""",
"selection_sort": """
def selection_sort(arr):
for i in range(len(arr)):
min_idx = i
for j in range(i + 1, len(arr)):
if arr[j] < arr[min_idx]:
min_idx = j
arr[i], arr[min_idx] = arr[min_idx], arr[i]
return arr""",
"heap_sort": """
def heap_sort(arr):
import heapq
heapq.heapify(arr)
return [heapq.heappop(arr) for _ in range(len(arr))]""",
},
"File I/O": {
"read_json": """
import json
def read_json(path):
with open(path, 'r') as f:
return json.load(f)""",
"write_file": """
def write_file(path, content):
with open(path, 'w') as f:
f.write(content)""",
"read_csv": """
import csv
def read_csv(path):
with open(path, 'r') as f:
reader = csv.reader(f)
return list(reader)""",
"read_yaml": """
import yaml
def load_yaml(path):
with open(path, 'r') as f:
return yaml.safe_load(f)""",
"write_json": """
import json
def write_json(path, data):
with open(path, 'w') as f:
json.dump(data, f, indent=2)""",
"read_lines": """
def read_lines(path):
with open(path, 'r') as f:
return f.readlines()""",
},
"Math": {
"factorial": """
def factorial(n):
if n <= 1:
return 1
return n * factorial(n - 1)""",
"fibonacci": """
def fibonacci(n):
a, b = 0, 1
for _ in range(n):
a, b = b, a + b
return a""",
"gcd": """
def gcd(a, b):
while b:
a, b = b, a % b
return a""",
"is_prime": """
def is_prime(n):
if n < 2:
return False
for i in range(2, int(n**0.5) + 1):
if n % i == 0:
return False
return True""",
"power": """
def power(base, exp):
if exp == 0:
return 1
if exp % 2 == 0:
half = power(base, exp // 2)
return half * half
return base * power(base, exp - 1)""",
"sum_digits": """
def sum_digits(n):
total = 0
while n > 0:
total += n % 10
n //= 10
return total""",
},
"Networking": {
"http_get": """
import requests
def http_get(url):
response = requests.get(url)
return response.json()""",
"post_data": """
import requests
def post_data(url, payload):
response = requests.post(url, json=payload)
return response.status_code, response.json()""",
"fetch_url": """
import urllib.request
def fetch_url(url):
with urllib.request.urlopen(url) as resp:
return resp.read().decode('utf-8')""",
"download_file": """
import urllib.request
def download_file(url, dest):
urllib.request.urlretrieve(url, dest)
return dest""",
"http_put": """
import requests
def http_put(url, data):
response = requests.put(url, json=data)
return response.status_code""",
"http_delete": """
import requests
def http_delete(url):
response = requests.delete(url)
return response.status_code""",
},
"String ops": {
"reverse_str": """
def reverse_string(s):
return s[::-1]""",
"is_palindrome": """
def is_palindrome(s):
s = s.lower().replace(' ', '')
return s == s[::-1]""",
"count_vowels": """
def count_vowels(s):
return sum(1 for c in s.lower() if c in 'aeiou')""",
"capitalize_words": """
def capitalize_words(s):
return ' '.join(w.capitalize() for w in s.split())""",
"remove_duplicates": """
def remove_duplicate_chars(s):
seen = set()
result = []
for c in s:
if c not in seen:
seen.add(c)
result.append(c)
return ''.join(result)""",
"count_words": """
def count_words(text):
words = text.lower().split()
freq = {}
for w in words:
freq[w] = freq.get(w, 0) + 1
return freq""",
},
"Data structures": {
"stack_push_pop": """
class Stack:
def __init__(self):
self.items = []
def push(self, item):
self.items.append(item)
def pop(self):
return self.items.pop()""",
"queue_impl": """
from collections import deque
class Queue:
def __init__(self):
self.items = deque()
def enqueue(self, item):
self.items.append(item)
def dequeue(self):
return self.items.popleft()""",
"linked_list": """
class Node:
def __init__(self, val):
self.val = val
self.next = None
class LinkedList:
def __init__(self):
self.head = None
def append(self, val):
node = Node(val)
if not self.head:
self.head = node
return
curr = self.head
while curr.next:
curr = curr.next
curr.next = node""",
"binary_tree": """
class TreeNode:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
def inorder(root):
if root:
yield from inorder(root.left)
yield root.val
yield from inorder(root.right)""",
"hash_map": """
class HashMap:
def __init__(self, size=256):
self.buckets = [[] for _ in range(size)]
def put(self, key, value):
idx = hash(key) % len(self.buckets)
for i, (k, v) in enumerate(self.buckets[idx]):
if k == key:
self.buckets[idx][i] = (key, value)
return
self.buckets[idx].append((key, value))""",
"priority_queue": """
import heapq
class PriorityQueue:
def __init__(self):
self.heap = []
def push(self, priority, item):
heapq.heappush(self.heap, (priority, item))
def pop(self):
return heapq.heappop(self.heap)[1]""",
},
}
def embed_code(code: str) -> torch.Tensor:
"""Embed code into a normalized vector."""
inputs = tokenizer(
code, return_tensors="pt", truncation=True, max_length=512, padding=True
).to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
mask = inputs["attention_mask"].unsqueeze(-1)
embedding = (outputs.last_hidden_state * mask).sum(dim=1) / mask.sum(dim=1)
return F.normalize(embedding, p=2, dim=1).squeeze(0)
# ── Step 1: Compute all embeddings ────────────────────────────────────────
print("Computing embeddings...")
all_names = []
all_labels = []
all_vectors = []
for group_name, snippets in groups.items():
for snippet_name, code in snippets.items():
vec = embed_code(code).cpu().numpy()
all_names.append(snippet_name)
all_labels.append(group_name)
all_vectors.append(vec)
print(f" [{group_name:12s}] {snippet_name}")
X = np.stack(all_vectors) # shape: [N, 768]
N, D = X.shape
print(f"\nEmbedding matrix: {N} snippets × {D} dimensions\n")
# ── Step 2: Define similarity metrics ─────────────────────────────────────
def cosine_matrix(vectors):
"""Compute pairwise cosine similarity for L2-normalized vectors."""
norms = np.linalg.norm(vectors, axis=1, keepdims=True)
norms = np.maximum(norms, 1e-10)
normed = vectors / norms
return normed @ normed.T
def compute_metrics(sim_matrix, labels):
"""
Compute intra-group (same category) and inter-group (different category)
average similarities. The GAP between them measures discriminability.
"""
intra_sims = []
inter_sims = []
n = len(labels)
for i in range(n):
for j in range(i + 1, n):
if labels[i] == labels[j]:
intra_sims.append(sim_matrix[i, j])
else:
inter_sims.append(sim_matrix[i, j])
intra_mean = np.mean(intra_sims)
inter_mean = np.mean(inter_sims)
gap = intra_mean - inter_mean
return intra_mean, inter_mean, gap
# ── Step 3: Sweep across PCA dimensions ──────────────────────────────────
# PCA can have at most min(N, D) components; cap accordingly
max_components = min(N, D)
dims_to_test = sorted(set(
k for k in [2, 3, 5, 8, 10, 15, 20, 30, 50, 75, 100, 150, 200,
300, 400, 500, 600, D]
if k <= max_components
))
dims_to_test.append(D) # always include full dimensionality as baseline
print("=" * 70)
print("PCA DENOISING EXPERIMENT")
print("=" * 70)
print(f"\n{'Components':>12s} {'Intra-Group':>12s} {'Inter-Group':>12s} "
f"{'Gap':>8s} {'vs Full':>8s}")
print("-" * 62)
results = []
for k in dims_to_test:
if k >= D:
# Full dimensionality — no PCA, just use original vectors
X_reduced = X.copy()
actual_k = D
else:
pca = PCA(n_components=k, random_state=42)
X_reduced = pca.fit_transform(X)
actual_k = k
sim = cosine_matrix(X_reduced)
intra, inter, gap = compute_metrics(sim, all_labels)
results.append((actual_k, intra, inter, gap))
# Compute full-dim gap for comparison
full_intra, full_inter, full_gap = results[-1][1], results[-1][2], results[-1][3]
for k, intra, inter, gap in results:
delta = gap - full_gap
delta_str = f"{delta:+.4f}" if k < D else " (base)"
print(f"{k:>12d} {intra:>12.4f} {inter:>12.4f} {gap:>8.4f} {delta_str:>8s}")
# ── Step 4: Find the optimal dimensionality ──────────────────────────────
dims_arr = np.array([r[0] for r in results])
gaps_arr = np.array([r[3] for r in results])
best_idx = np.argmax(gaps_arr)
best_k, best_gap = int(dims_arr[best_idx]), gaps_arr[best_idx]
print(f"\n{'=' * 70}")
print(f"BEST DIMENSIONALITY: {best_k} components")
print(f" Gap (intra - inter): {best_gap:.4f} vs {full_gap:.4f} at full 768-d")
print(f" Improvement: {best_gap - full_gap:+.4f}")
print(f"{'=' * 70}")
# ── Step 5: Show detailed comparison at optimal k vs full ────────────────
print(f"\n── Detailed Similarity Matrix at k={best_k} vs k={D} ──\n")
if best_k < D:
pca_best = PCA(n_components=best_k, random_state=42)
X_best = pca_best.fit_transform(X)
else:
X_best = X.copy()
sim_full = cosine_matrix(X)
sim_best = cosine_matrix(X_best)
# Show a selection of interesting pairs
print(f"{'Snippet A':>20s} {'Snippet B':>20s} {'Full 768d':>10s} "
f"{'PCA {0}d'.format(best_k):>10s} {'Change':>8s}")
print("-" * 78)
interesting_pairs = [
# Intra-group: should be high
("bubble_sort", "quick_sort"),
("bubble_sort", "merge_sort"),
("read_json", "read_csv"),
("http_get", "fetch_url"),
("factorial", "fibonacci"),
("reverse_str", "is_palindrome"),
("stack_push_pop", "queue_impl"),
# Inter-group: should be low
("bubble_sort", "read_json"),
("factorial", "http_get"),
("reverse_str", "download_file"),
("is_prime", "write_file"),
("stack_push_pop", "count_vowels"),
]
for n1, n2 in interesting_pairs:
i = all_names.index(n1)
j = all_names.index(n2)
s_full = sim_full[i, j]
s_best = sim_best[i, j]
same = all_labels[i] == all_labels[j]
marker = "SAME" if same else "DIFF"
change = s_best - s_full
print(f"{n1:>20s} {n2:>20s} {s_full:>10.4f} {s_best:>10.4f} "
f"{change:>+8.4f} [{marker}]")
# ── Step 6: Text-to-code search comparison ────────────────────────────────
print(f"\n── Text-to-Code Search: Full 768d vs PCA {best_k}d ──\n")
search_queries = [
("sort a list of numbers", "Sorting"),
("read a JSON config file", "File I/O"),
("compute factorial recursively", "Math"),
("make an HTTP GET request", "Networking"),
("check if a number is prime", "Math"),
]
if best_k < D:
pca_search = PCA(n_components=best_k, random_state=42)
X_search = pca_search.fit_transform(X)
else:
X_search = X.copy()
pca_search = None
for query, expected_group in search_queries:
q_vec = embed_code(query).cpu().numpy().reshape(1, -1)
# Full dimension search
q_norm = q_vec / np.linalg.norm(q_vec)
X_norm = X / np.linalg.norm(X, axis=1, keepdims=True)
scores_full = (X_norm @ q_norm.T).flatten()
# PCA-reduced search
if pca_search is not None:
q_reduced = pca_search.transform(q_vec)
else:
q_reduced = q_vec.copy()
q_r_norm = q_reduced / np.linalg.norm(q_reduced)
X_s_norm = X_search / np.linalg.norm(X_search, axis=1, keepdims=True)
scores_pca = (X_s_norm @ q_r_norm.T).flatten()
top_full = np.argsort(-scores_full)[:3]
top_pca = np.argsort(-scores_pca)[:3]
print(f' Query: "{query}"')
print(f' Full 768d: {all_names[top_full[0]]:>16s} ({scores_full[top_full[0]]:.3f})'
f' {all_names[top_full[1]]:>16s} ({scores_full[top_full[1]]:.3f})'
f' {all_names[top_full[2]]:>16s} ({scores_full[top_full[2]]:.3f})')
print(f' PCA {best_k:>3d}d: {all_names[top_pca[0]]:>16s} ({scores_pca[top_pca[0]]:.3f})'
f' {all_names[top_pca[1]]:>16s} ({scores_pca[top_pca[1]]:.3f})'
f' {all_names[top_pca[2]]:>16s} ({scores_pca[top_pca[2]]:.3f})')
full_correct = all_labels[top_full[0]] == expected_group
pca_correct = all_labels[top_pca[0]] == expected_group
print(f' Full correct: {full_correct} | PCA correct: {pca_correct}')
print()
# ── Step 7: Visualization ─────────────────────────────────────────────────
# Six-panel figure for a comprehensive visual analysis.
group_colors = {
"Sorting": "#1f77b4", "File I/O": "#ff7f0e", "Math": "#2ca02c",
"Networking": "#d62728", "String ops": "#9467bd", "Data structures": "#8c564b",
}
label_colors = [group_colors[g] for g in all_labels]
unique_groups = list(dict.fromkeys(all_labels))
fig = plt.figure(figsize=(20, 13))
fig.suptitle("PCA Denoising Analysis — Can Fewer Dimensions Improve Code Similarity?",
fontsize=15, fontweight="bold", y=0.98)
# ── Row 1 ──
# Plot 1: Intra/inter similarity vs number of PCA components
ax1 = fig.add_subplot(2, 3, 1)
dims_plot = [r[0] for r in results]
intra_plot = [r[1] for r in results]
inter_plot = [r[2] for r in results]
ax1.fill_between(dims_plot, inter_plot, intra_plot, alpha=0.15, color="tab:green")
ax1.plot(dims_plot, intra_plot, "o-", color="tab:blue", linewidth=2,
label="Intra-group (same category)", markersize=6)
ax1.plot(dims_plot, inter_plot, "s-", color="tab:red", linewidth=2,
label="Inter-group (different category)", markersize=6)
ax1.axvline(x=best_k, color="green", linestyle="--", alpha=0.7,
label=f"Best gap at k={best_k}")
ax1.set_xlabel("Number of PCA Components", fontsize=10)
ax1.set_ylabel("Average Cosine Similarity", fontsize=10)
ax1.set_title("(a) Intra- vs Inter-Group Similarity", fontsize=11, fontweight="bold")
ax1.legend(fontsize=7, loc="center right")
ax1.set_xscale("log")
ax1.grid(True, alpha=0.3)
# Plot 2: Gap (discriminability) vs number of PCA components
ax2 = fig.add_subplot(2, 3, 2)
gaps_plot = [r[3] for r in results]
ax2.plot(dims_plot, gaps_plot, "D-", color="tab:green", linewidth=2, markersize=7)
ax2.axvline(x=best_k, color="green", linestyle="--", alpha=0.7,
label=f"Best k={best_k} (gap={best_gap:.3f})")
ax2.axhline(y=full_gap, color="gray", linestyle=":", alpha=0.7,
label=f"Full 768d (gap={full_gap:.3f})")
ax2.fill_between(dims_plot, full_gap, gaps_plot, alpha=0.12, color="tab:green",
where=[g > full_gap for g in gaps_plot])
ax2.set_xlabel("Number of PCA Components", fontsize=10)
ax2.set_ylabel("Gap (Intra Inter)", fontsize=10)
ax2.set_title("(b) Discriminability vs Dimensionality", fontsize=11, fontweight="bold")
ax2.legend(fontsize=8)
ax2.set_xscale("log")
ax2.grid(True, alpha=0.3)
# Plot 3: Cumulative variance explained
pca_full = PCA(n_components=min(N, D), random_state=42)
pca_full.fit(X)
cumvar = np.cumsum(pca_full.explained_variance_ratio_) * 100
ax3 = fig.add_subplot(2, 3, 3)
ax3.plot(range(1, len(cumvar) + 1), cumvar, "-", color="tab:purple", linewidth=2)
ax3.axvline(x=best_k, color="green", linestyle="--", alpha=0.7,
label=f"Best k={best_k}")
for threshold in [90, 95, 99]:
k_thresh = np.searchsorted(cumvar, threshold) + 1
if k_thresh <= len(cumvar):
ax3.axhline(y=threshold, color="gray", linestyle=":", alpha=0.4)
ax3.annotate(f"{threshold}% → k={k_thresh}", xy=(k_thresh, threshold),
fontsize=8, color="gray", ha="left",
xytext=(k_thresh + 1, threshold - 2))
ax3.set_xlabel("Number of PCA Components", fontsize=10)
ax3.set_ylabel("Cumulative Variance Explained (%)", fontsize=10)
ax3.set_title("(c) Variance Concentration", fontsize=11, fontweight="bold")
ax3.legend(fontsize=8)
ax3.set_xscale("log")
ax3.grid(True, alpha=0.3)
# ── Row 2 ──
# Plot 4 & 5: Side-by-side heatmaps (full vs PCA-denoised)
# Sort indices by group for a block-diagonal structure
sorted_idx = sorted(range(N), key=lambda i: all_labels[i])
sorted_names = [all_names[i] for i in sorted_idx]
sorted_labels = [all_labels[i] for i in sorted_idx]
sim_full_sorted = sim_full[np.ix_(sorted_idx, sorted_idx)]
sim_best_sorted = sim_best[np.ix_(sorted_idx, sorted_idx)]
for panel_idx, (mat, title_str) in enumerate([
(sim_full_sorted, f"(d) Similarity Heatmap — Full 768d"),
(sim_best_sorted, f"(e) Similarity Heatmap — PCA {best_k}d (Denoised)"),
]):
ax = fig.add_subplot(2, 3, 4 + panel_idx)
im = ax.imshow(mat, cmap="RdBu_r", vmin=-1, vmax=1, aspect="auto")
ax.set_xticks(range(N))
ax.set_yticks(range(N))
ax.set_xticklabels(sorted_names, rotation=90, fontsize=5)
ax.set_yticklabels(sorted_names, fontsize=5)
# Draw group boundary lines
prev_label = sorted_labels[0]
for i, lab in enumerate(sorted_labels):
if lab != prev_label:
ax.axhline(y=i - 0.5, color="black", linewidth=1)
ax.axvline(x=i - 0.5, color="black", linewidth=1)
prev_label = lab
ax.set_title(title_str, fontsize=11, fontweight="bold")
plt.colorbar(im, ax=ax, shrink=0.8, label="Cosine Similarity")
# Plot 6: Bar chart comparing specific pairs at full vs PCA
ax6 = fig.add_subplot(2, 3, 6)
pair_labels = []
full_scores = []
pca_scores = []
pair_colors = []
for n1, n2 in interesting_pairs:
i = all_names.index(n1)
j = all_names.index(n2)
pair_labels.append(f"{n1}\nvs {n2}")
full_scores.append(sim_full[i, j])
pca_scores.append(sim_best[i, j])
pair_colors.append("#2ca02c" if all_labels[i] == all_labels[j] else "#d62728")
y_pos = np.arange(len(pair_labels))
bar_h = 0.35
bars_full = ax6.barh(y_pos + bar_h / 2, full_scores, bar_h, label="Full 768d",
color="tab:blue", alpha=0.7)
bars_pca = ax6.barh(y_pos - bar_h / 2, pca_scores, bar_h, label=f"PCA {best_k}d",
color="tab:orange", alpha=0.7)
# Color labels by same/different group
for i, (yl, col) in enumerate(zip(pair_labels, pair_colors)):
ax6.annotate("", xy=(-0.05, y_pos[i]), fontsize=10, color=col,
ha="right", va="center", fontweight="bold",
annotation_clip=False)
ax6.set_yticks(y_pos)
ax6.set_yticklabels(pair_labels, fontsize=6)
ax6.set_xlabel("Cosine Similarity", fontsize=10)
ax6.set_title("(f) Pair Comparison: Full vs PCA Denoised", fontsize=11, fontweight="bold")
ax6.legend(fontsize=8)
ax6.axvline(x=0, color="black", linewidth=0.5)
ax6.set_xlim(-1.1, 1.1)
ax6.grid(True, axis="x", alpha=0.3)
ax6.invert_yaxis()
# Custom legend for the dots
from matplotlib.lines import Line2D
dot_legend = [Line2D([0], [0], marker="o", color="w", markerfacecolor="#2ca02c",
markersize=8, label="Same group"),
Line2D([0], [0], marker="o", color="w", markerfacecolor="#d62728",
markersize=8, label="Different group")]
ax6.legend(handles=[bars_full, bars_pca] + dot_legend, fontsize=7, loc="lower right")
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig("pca_denoising_analysis.png", dpi=150, bbox_inches="tight")
print(f"\nSaved: pca_denoising_analysis.png")
# ── Summary ───────────────────────────────────────────────────────────────
print(f"""
{'=' * 70}
CONCLUSIONS
{'=' * 70}
1. VARIANCE CONCENTRATION:
The first few PCA components capture a disproportionate amount of
variance. This means the embedding space has low effective
dimensionality most of the 768 dimensions are semi-redundant.
2. DENOISING EFFECT:
At k={best_k}, the gap between intra-group and inter-group similarity
is {best_gap:.4f} (vs {full_gap:.4f} at full 768d).
{'PCA denoising IMPROVED discriminability by removing noisy dimensions.' if best_gap > full_gap else 'Full dimensionality was already optimal for this dataset.'}
3. PRACTICAL IMPLICATIONS:
- For retrieval (code search), moderate PCA reduction can sharpen
results while also reducing storage and computation.
- Too few dimensions (k=2,3) lose important signal.
- Too many dimensions may retain noise that dilutes similarity.
- The "sweet spot" depends on the dataset and task.
4. TRADE-OFF:
PCA denoising is a post-hoc technique. Newer embedding models are
trained with Matryoshka Representation Learning (MRL) that makes
the FIRST k dimensions maximally informative by design.
""")

93
Code embeddings/README.md Normal file
View File

@ -0,0 +1,93 @@
# Code Embeddings — Hands-On Examples
**AISE501 AI in Software Engineering I**
Fachhochschule Graubünden — Spring Semester 2026
## Overview
Seven self-contained Python programs that demonstrate how embedding
models work. Each script loads a pre-trained model, embeds text or code
snippets, and explores a different capability of embeddings.
| # | Script | What it demonstrates |
|---|--------|---------------------|
| 0 | `00_tokens_and_embeddings_intro.py` | Tokenization basics and general text embeddings (German) |
| 1 | `01_basic_embeddings.py` | Compute code embeddings and pairwise cosine similarity |
| 2 | `02_text_to_code_search.py` | Semantic search: find code from natural language queries |
| 3 | `03_cross_language.py` | Same algorithm in 4 languages → similar embeddings |
| 4 | `04_clone_detection.py` | Detect duplicate/similar code in a simulated codebase |
| 5 | `05_visualize_embeddings.py` | PCA and t-SNE plots of the embedding space |
| 6 | `06_pca_denoising.py` | PCA denoising: fewer dimensions can improve similarity |
## Setup
### 1. Create a virtual environment (recommended)
```bash
python -m venv venv
# macOS / Linux
source venv/bin/activate
# Windows
venv\Scripts\activate
```
### 2. Install dependencies
```bash
pip install -r requirements.txt
```
**PyTorch GPU support:**
- **Apple Silicon Mac (M1/M2/M3/M4):** MPS acceleration works
out of the box with the standard PyTorch install. No extra steps needed.
- **NVIDIA GPU (Windows/Linux):** Install the CUDA version of PyTorch.
See https://pytorch.org/get-started/locally/ for the correct command
for your CUDA version.
- **CPU only:** Everything works on CPU too, just a bit slower.
### 3. Run any example
```bash
python 00_tokens_and_embeddings_intro.py
python 01_basic_embeddings.py
python 02_text_to_code_search.py
python 03_cross_language.py
python 04_clone_detection.py
python 05_visualize_embeddings.py
python 06_pca_denoising.py
```
The first run will download the model (~300 MB). Subsequent runs
use the cached model.
## Model
All code embedding examples (0106) use **st-codesearch-distilroberta-base**
(82M parameters), a DistilRoBERTa model fine-tuned on 1.38 million
code-comment pairs from CodeSearchNet using contrastive learning
(MultipleNegativesRankingLoss). It produces 768-dimensional embedding
vectors optimized for matching natural language descriptions to code,
making it ideal for semantic code search and similarity tasks.
The introductory example (00) uses **paraphrase-multilingual-mpnet-base-v2**
for demonstrating general language embeddings with German text.
## Hardware Requirements
- **RAM:** 1 GB free (for the model)
- **Disk:** ~500 MB (for the downloaded model, cached in `~/.cache/huggingface/`)
- **GPU:** Optional — all scripts auto-detect and use:
- CUDA (NVIDIA GPUs)
- MPS (Apple Silicon)
- CPU (fallback)
## Expected Output
Each script prints structured output with explanations. Example 5
saves two PNG images (`code_embeddings_pca.png` and
`code_embeddings_tsne.png`) showing the embedding space. Example 6
saves `pca_denoising_analysis.png` with three sub-plots analyzing
optimal embedding dimensions.

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 132 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 398 KiB

View File

@ -0,0 +1,6 @@
torch
transformers
sentence-transformers
scikit-learn
matplotlib
numpy

BIN
Prompting Exercise/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,67 @@
"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually in Exercise 2 the LLM will help you find them!
Can you spot the issues yourself before asking the LLM?
"""
def calculate_statistics(numbers):
total = 0
for n in numbers:
total = total + n
average = total / len(numbers) # Bug 1: ZeroDivisionError when list is empty
min_val = numbers[0] # Bug 2: IndexError when list is empty
max_val = numbers[0]
for n in numbers:
if n < min_val:
min_val = n
if n > max_val:
max_val = n
variance = 0
for n in numbers:
variance = variance + (n - average) ** 2
variance = variance / len(numbers) # Bug 3: population variance (÷N), not sample variance (÷N-1)
return {
"count": len(numbers),
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
def process_data(filename):
numbers = []
f = open(filename) # Bug 4: no context manager (file may not be closed on error)
for line in f:
numbers.append(int(line.strip())) # Bug 5: int() crashes on floats and blank lines
f.close()
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
def normalize(numbers, method="minmax"):
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
return [(x - mn) / mx - mn for x in numbers] # Bug 6: operator-precedence error
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
return [(x - stats["average"]) / std for x in numbers]
else:
print("Unknown normalisation method") # Bug 7: should raise ValueError, not just print
if __name__ == "__main__":
sample = [4, 8, 15, 16, 23, 42]
print(calculate_statistics(sample))

View File

@ -0,0 +1,67 @@
"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually in Exercise 2 the LLM will help you find them!
Can you spot the issues yourself before asking the LLM?
"""
def calculate_statistics(numbers):
total = 0
for n in numbers:
total = total + n
average = total / len(numbers)
min_val = numbers[0]
max_val = numbers[0]
for n in numbers:
if n < min_val:
min_val = n
if n > max_val:
max_val = n
variance = 0
for n in numbers:
variance = variance + (n - average) ** 2
variance = variance / len(numbers)
return {
"count": len(numbers),
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
def process_data(filename):
numbers = []
f = open(filename)
for line in f:
numbers.append(int(line.strip()))
f.close()
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
def normalize(numbers, method="minmax"):
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
return [(x - mn) / mx - mn for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
return [(x - stats["average"]) / std for x in numbers]
else:
print("Unknown normalisation method")
if __name__ == "__main__":
sample = [4, 8, 15, 16, 23, 42]
print(calculate_statistics(sample))

View File

@ -0,0 +1,89 @@
import sys
"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually in Exercise 2 the LLM will help you find them!
Can you spot the issues yourself before asking the LLM?
"""
def calculate_statistics(numbers):
if not numbers:
raise ValueError("Cannot calculate statistics for an empty list.")
total = 0
for n in numbers:
total = total + n
average = total / len(numbers)
min_val = numbers[0]
max_val = numbers[0]
for n in numbers:
if n < min_val:
min_val = n
if n > max_val:
max_val = n
variance = 0
for n in numbers:
variance = variance + (n - average) ** 2
variance = variance / len(numbers)
return {
"count": len(numbers),
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
def process_data(filename):
numbers = []
try:
with open(filename, 'r') as file_handle:
for line in file_handle:
stripped_line = line.strip()
if stripped_line:
numbers.append(int(stripped_line))
except FileNotFoundError:
print(f"Error: File '{filename}' not found.")
raise
except ValueError as e:
print(f"Error: Invalid integer in file: {e}")
raise
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
def normalize(numbers, method="minmax"):
if not numbers:
raise ValueError("Cannot normalize an empty list.")
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
if mx == mn:
return [0.0 for _ in numbers]
return [(x - mn) / (mx - mn) for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
if std == 0:
return [0.0 for _ in numbers]
return [(x - stats["average"]) / std for x in numbers]
else:
print("Unknown normalization method")
return []
if __name__ == "__main__":
sample = [4, 8, 15, 16, 23, 42]
print(calculate_statistics(sample))

View File

@ -0,0 +1,192 @@
"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This module provides robust functions for calculating statistics,
processing data files, and normalizing numeric lists.
All functions include PEP-484 type hints and NumPy-style docstrings.
"""
from typing import List, Dict, Union, Any
def calculate_statistics(numbers: List[Union[int, float]]) -> Dict[str, Any]:
"""
Calculate basic statistics for a list of numbers.
Parameters
----------
numbers : List[Union[int, float]]
The list of numeric values to analyze.
Returns
-------
Dict[str, Any]
A dictionary containing count, sum, average, min, max, and variance.
If the input list is empty, returns a dictionary with zero values
for all fields except count (which is 0).
Notes
-----
- Variance is calculated using the sample variance formula (dividing by N-1).
- If the list is empty, the function returns early to avoid division by zero
or index errors.
"""
count = len(numbers)
if count == 0:
return {
"count": 0,
"sum": 0.0,
"average": 0.0,
"min": 0.0,
"max": 0.0,
"variance": 0.0,
}
total = sum(numbers)
average = total / count
min_val = min(numbers)
max_val = max(numbers)
# Calculate sample variance (divide by N-1)
variance_sum = sum((n - average) ** 2 for n in numbers)
variance = variance_sum / (count - 1)
return {
"count": count,
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
def process_data(filename: str) -> Dict[str, Any]:
"""
Read numeric data from a file and calculate statistics.
Parameters
----------
filename : str
Path to the input file containing one number per line.
Blank lines and non-numeric lines are skipped.
Returns
-------
Dict[str, Any]
The statistics dictionary returned by calculate_statistics().
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file cannot be read or contains no valid numbers.
"""
numbers: List[Union[int, float]] = []
try:
with open(filename, 'r') as f:
for line in f:
stripped = line.strip()
if not stripped:
continue
try:
# Attempt to parse as float to handle both int and float
numbers.append(float(stripped))
except ValueError:
# Skip non-numeric lines
continue
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {filename}")
except IOError as e:
raise IOError(f"Error reading file {filename}: {e}")
if not numbers:
raise ValueError(f"No valid numeric data found in {filename}")
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
def normalize(numbers: List[Union[int, float]], method: str = "minmax") -> List[float]:
"""
Normalize a list of numbers using the specified method.
Parameters
----------
numbers : List[Union[int, float]]
The list of numeric values to normalize.
method : str, optional
The normalization method to use. Options are:
- "minmax": Min-Max normalization to [0, 1]
- "zscore": Z-score normalization (standardization)
Returns
-------
List[float]
The normalized list of numbers.
Raises
------
ValueError
If an unknown normalization method is provided, or if the list is empty.
ZeroDivisionError
If the range is zero for minmax or standard deviation is zero for zscore.
"""
if not numbers:
raise ValueError("Cannot normalize an empty list.")
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
range_val = mx - mn
if range_val == 0:
# If all values are the same, return zeros or handle as needed
return [0.0 for _ in numbers]
return [(x - mn) / range_val for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
if std == 0:
# If standard deviation is zero, all values are the same
return [0.0 for _ in numbers]
return [(x - stats["average"]) / std for x in numbers]
else:
raise ValueError(f"Unknown normalization method: '{method}'. "
f"Supported methods: 'minmax', 'zscore'.")
if __name__ == "__main__":
# Basic sanity checks
sample = [4, 8, 15, 16, 23, 42]
print("Testing calculate_statistics:")
stats = calculate_statistics(sample)
print(stats)
print("\nTesting normalize (minmax):")
normalized_minmax = normalize(sample, "minmax")
print(normalized_minmax)
print("\nTesting normalize (zscore):")
normalized_zscore = normalize(sample, "zscore")
print(normalized_zscore)
print("\nTesting empty list handling:")
empty_stats = calculate_statistics([])
print(empty_stats)
print("\nTesting unknown method error:")
try:
normalize(sample, "unknown")
except ValueError as e:
print(f"Caught expected error: {e}")
print("\nAll sanity checks passed!")

View File

@ -0,0 +1,89 @@
import sys
"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually in Exercise 2 the LLM will help you find them!
Can you spot the issues yourself before asking the LLM?
"""
def calculate_statistics(numbers):
if not numbers:
return {
"count": 0,
"sum": 0,
"average": 0.0,
"min": None,
"max": None,
"variance": 0.0,
}
total = 0
for n in numbers:
total = total + n
average = total / len(numbers)
min_val = numbers[0]
max_val = numbers[0]
for n in numbers:
if n < min_val:
min_val = n
if n > max_val:
max_val = n
variance = 0
for n in numbers:
variance = variance + (n - average) ** 2
variance = variance / (len(numbers) - 1) if len(numbers) > 1 else 0.0
return {
"count": len(numbers),
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
def process_data(filename):
numbers = []
with open(filename) as file_handle:
for line in file_handle:
stripped = line.strip()
if not stripped:
continue
try:
numbers.append(float(stripped))
except ValueError:
continue
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
def normalize(numbers, method="minmax"):
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
if mx == mn:
return [0.0 for _ in numbers]
return [(x - mn) / (mx - mn) for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
if std == 0:
return [0.0 for _ in numbers]
return [(x - stats["average"]) / std for x in numbers]
else:
raise ValueError(f"Unknown normalization method: {method}")
if __name__ == "__main__":
sample = [4, 8, 15, 16, 23, 42]
print(calculate_statistics(sample))

View File

@ -0,0 +1,216 @@
"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually in Exercise 2 the LLM will help you find them!
Can you spot the issues yourself before asking the LLM?
"""
def calculate_statistics(numbers: list[float]) -> dict[str, float]:
"""
Calculate basic statistical measures for a list of numbers.
This function computes the count, sum, average, minimum, maximum, and
variance (population variance) of the provided list of numbers.
Parameters
----------
numbers : list[float]
A list of numeric values to analyze.
Returns
-------
dict[str, float]
A dictionary containing the following keys:
- 'count': The number of elements in the list.
- 'sum': The sum of all elements.
- 'average': The arithmetic mean of the elements.
- 'min': The minimum value in the list.
- 'max': The maximum value in the list.
- 'variance': The population variance of the elements.
Raises
------
ZeroDivisionError
If the input list is empty, division by zero will occur when
calculating the average and variance.
IndexError
If the input list is empty, accessing the first element for min/max
will raise an error.
"""
# Step 2 Implement empty list handling in calculate_statistics
if not numbers:
return {
"count": 0,
"sum": 0.0,
"average": 0.0,
"min": 0.0,
"max": 0.0,
"variance": 0.0,
}
total = 0
for n in numbers:
total = total + n
average = total / len(numbers) # Bug 1: ZeroDivisionError when list is empty
min_val = numbers[0] # Bug 2: IndexError when list is empty
max_val = numbers[0]
for n in numbers:
if n < min_val:
min_val = n
if n > max_val:
max_val = n
variance = 0
for n in numbers:
variance = variance + (n - average) ** 2
# Step 3 Correct variance calculation to use sample variance
count = len(numbers)
if count > 1:
variance = variance / (count - 1)
else:
variance = 0.0
return {
"count": len(numbers),
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
# Step 4 Define type hints and docstrings for process_data
def process_data(filename: str) -> dict[str, float]:
"""
Read numeric data from a file and compute statistics.
This function opens a text file, reads each line, converts it to an integer,
and collects the values into a list. It then passes this list to
calculate_statistics to compute and return the statistical summary.
Parameters
----------
filename : str
The path to the text file containing one number per line.
Returns
-------
dict[str, float]
A dictionary containing the statistical measures computed from the file data.
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If a line in the file cannot be converted to an integer.
"""
numbers = []
# Step 5 Implement context manager and robust line parsing in process_data
with open(filename) as f:
for line in f:
stripped = line.strip()
if not stripped:
continue
try:
# Attempt to convert to float first to handle both ints and floats
value = float(stripped)
numbers.append(value)
except ValueError:
# Skip lines that cannot be converted to a number
continue
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
# Step 6 Define type hints and docstrings for normalize
def normalize(numbers: list[float], method: str = "minmax") -> list[float]:
"""
Normalize a list of numbers using the specified method.
This function applies either 'minmax' scaling or 'zscore' standardization
to the input list of numbers.
Parameters
----------
numbers : list[float]
A list of numeric values to normalize.
method : str, optional
The normalization method to use. Options are:
- 'minmax': Scales values to the range [0, 1].
- 'zscore': Standardizes values to have mean 0 and standard deviation 1.
Default is 'minmax'.
Returns
-------
list[float]
A list of normalized values.
Raises
------
ValueError
If an unknown normalization method is provided.
ZeroDivisionError
If 'minmax' is used on a list where all values are identical (range is 0),
or if 'zscore' is used on a list with zero standard deviation.
Examples
--------
>>> normalize([1, 2, 3, 4, 5])
[0.0, 0.25, 0.5, 0.75, 1.0]
"""
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
# Step 7 Fix operator precedence bug in minmax normalization
return [(x - mn) / (mx - mn) for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
return [(x - stats["average"]) / std for x in numbers]
else:
# Step 8 Replace print statement with ValueError for unknown methods
raise ValueError(f"Unknown normalisation method: {method}")
if __name__ == "__main__":
# Step 9 Implement and verify main block sanity checks
sample = [4, 8, 15, 16, 23, 42]
stats = calculate_statistics(sample)
# Verify expected values for sample data
expected_sum = 4 + 8 + 15 + 16 + 23 + 42
expected_count = 6
expected_avg = expected_sum / expected_count
assert stats["count"] == expected_count, f"Count mismatch: {stats['count']} != {expected_count}"
assert stats["sum"] == expected_sum, f"Sum mismatch: {stats['sum']} != {expected_sum}"
assert abs(stats["average"] - expected_avg) < 1e-9, f"Average mismatch: {stats['average']} != {expected_avg}"
assert stats["min"] == 4, f"Min mismatch: {stats['min']} != 4"
assert stats["max"] == 42, f"Max mismatch: {stats['max']} != 42"
# Test empty list handling
empty_stats = calculate_statistics([])
assert empty_stats["count"] == 0, "Empty list count should be 0"
assert empty_stats["sum"] == 0.0, "Empty list sum should be 0.0"
assert empty_stats["average"] == 0.0, "Empty list average should be 0.0"
assert empty_stats["min"] == 0.0, "Empty list min should be 0.0"
assert empty_stats["max"] == 0.0, "Empty list max should be 0.0"
assert empty_stats["variance"] == 0.0, "Empty list variance should be 0.0"
# Test normalization
normalized = normalize([1, 2, 3, 4, 5])
expected_normalized = [0.0, 0.25, 0.5, 0.75, 1.0]
assert len(normalized) == 5, "Normalized list length mismatch"
for i, val in enumerate(normalized):
assert abs(val - expected_normalized[i]) < 1e-9, f"Normalized value mismatch at index {i}"
print("All sanity checks passed!")

View File

@ -0,0 +1,142 @@
"""
Exercise 1 Basic XML Structured Prompting
============================================
AISE501 · Prompting in Coding · Spring Semester 2026
Learning goals
--------------
* Connect to the local LLM server and send your first prompt.
* Understand the difference between unstructured and XML-structured prompts.
* See how structure helps the model parse and prioritise different parts
of your request.
Tasks
-----
Part A Run the unstructured prompt (already done for you). Read the response.
Part B Complete the XML-structured version of the same request (TODOs 1-3).
Part C Add a system prompt to set the response style (TODOs 4-5).
"""
from server_utils import chat, get_client, print_messages, print_separator
client = get_client()
# ── Part A: Unstructured (Zero-Shot) Prompt ───────────────────────────────────
# This section is complete. Run it, read the response, then move on.
print_separator("Part A Unstructured Prompt")
unstructured_messages = [
{
"role": "user",
"content": (
"Explain what a Python list comprehension is, "
"give an example that filters even numbers from a list, "
"and list two common mistakes beginners make."
),
}
]
# print_messages(unstructured_messages) # ← always inspect what you send!
# response_a = chat(client, unstructured_messages)
# print(response_a)
# ── Part B: Structured Prompt with XML Tags ───────────────────────────────────
# Use XML tags to structure the same request more precisely.
# Named sections help the model parse and prioritise your intent.
print_separator("Part B Structured Prompt with XML Tags")
# TODO 1: Fill in the three XML sections below.
# Use the same topic as Part A but make each section specific.
#
# <topic> the Python concept to explain
# <example> what the code example should demonstrate
# <focus> two or three specific points you want covered in the answer
#
# Tip: XML tag names are arbitrary — choose names that make sense to a
# human reader and the model will understand them too.
structured_content = """\
<request>
<topic>
Python list comprehensions
</topic>
<example>
Filter even numbers from a list
</example>
<focus>
Syntax overview and two common beginner mistakes
</focus>
</request>"""
# TODO 2: Build the messages list.
# Use structured_content as the content of a "user" message.
#
# Reminder: messages is a list of dicts with keys "role" and "content".
# "role" is one of "system", "user", or "assistant".
structured_messages = [
# TODO: add the user message dict here
{
"role": "user",
"content": structured_content,
}
]
# TODO 3: Call chat() with structured_messages, store the result, print it.
# Compare the output with response_a above.
# Always call print_messages() before chat() to see the full prompt.
# print_messages(structured_messages)
# response_b = chat(client, structured_messages)
# print(response_b)
# ── Part C: Adding a System Prompt ────────────────────────────────────────────
# A system prompt lets you define a persona and global rules for every
# response in the conversation without repeating yourself each time.
print_separator("Part C Adding a System Prompt")
# TODO 4: Write an XML-structured system prompt that defines:
# <persona> who the LLM should be
# <style> tone and formatting rules
# <constraints> length or content limits
#
# Example persona: "experienced Python tutor who always shows code first"
system_content = """\
<request>
<persona>You are a master python developer and teacher</persona>
<style>You follow the PEP 8 style guide</style>
<constraints>Format your response in json</constraints>
</request>
"""
# TODO 5: Build a messages list that puts the system prompt FIRST (role="system"),
# followed by the structured user message from Part B.
# Call chat() and print the result.
#
# Reflection: How did the system prompt change the answer compared to Part B?
messages_c = [
{"role": "system", "content": system_content},
{"role": "user", "content": structured_content}
]
print_messages(messages_c)
response_c = chat(client, messages_c)
print(response_c)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. How did XML structure change the format and depth of the response?\n"
"2. What happens if you use inconsistent or missing closing tags?\n"
"3. When would you NOT bother with XML structure?\n"
"4. How does the system prompt interact with the user message?\n"
)

View File

@ -0,0 +1,91 @@
"""
Exercise 1 SOLUTION Basic XML Structured Prompting
=======================================================
AISE501 · Prompting in Coding · Spring Semester 2026
"""
from server_utils import chat, get_client, print_messages, print_separator
client = get_client()
temperature_value=0.3
# ── Part A: Unstructured (Zero-Shot) Prompt ───────────────────────────────────
print_separator("Part A Unstructured Prompt")
unstructured_messages = [
{
"role": "user",
"content": (
"Explain what a Python list comprehension is, "
"give an example that filters even numbers from a list, "
"and list two common mistakes beginners make."
),
}
]
print_messages(unstructured_messages)
response_a = chat(client, unstructured_messages)
print(response_a)
# ── Part B: Structured Prompt with XML Tags ───────────────────────────────────
print_separator("Part B Structured Prompt with XML Tags")
structured_content = """\
<request>
<topic>
Python list comprehensions
</topic>
<example>
A list comprehension that takes a list of integers and returns only
the even numbers, using a conditional filter expression.
</example>
<focus>
1. The general syntax: [expression for item in iterable if condition]
2. Two common beginner mistakes when writing list comprehensions
</focus>
</request>"""
structured_messages = [
{"role": "user", "content": structured_content}
]
print_messages(structured_messages)
response_b = chat(client, structured_messages, temperature=temperature_value)
print(response_b)
# ── Part C: Adding a System Prompt ────────────────────────────────────────────
print_separator("Part C Adding a System Prompt")
system_content = """\
<persona>
You are an experienced Python tutor. You teach Python to university students
who have basic programming knowledge but are new to idiomatic Python.
</persona>
<style>
Always show a working code snippet first, then explain it step by step.
Use plain language. Avoid jargon without defining it. Write python in PEP8 style
</style>
<constraints>
Keep each answer under 200 words. Use at most one code block per response.
</constraints>"""
messages_c = [
{"role": "system", "content": system_content},
{"role": "user", "content": structured_content},
]
print_messages(messages_c)
response_c = chat(client, messages_c,temperature=temperature_value)
print(response_c)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. How did XML structure change the format and depth of the response?\n"
"2. What happens if you use inconsistent or missing closing tags?\n"
"3. When would you NOT bother with XML structure?\n"
"4. How does the system prompt interact with the user message?\n"
)

View File

@ -0,0 +1,151 @@
"""
Exercise 2 Persona, Task, and Data in a Structured Prompt
============================================================
AISE501 · Prompting in Coding · Spring Semester 2026
Learning goals
--------------
* Use XML tags to separate three prompt concerns: WHO the LLM is,
WHAT it should do, and the DATA it should work with.
* Pass a real Python file as context (RAG-style) inside a <code> tag.
* Iterate on the prompt to extract more specific information.
The file analyze_me.py contains several bugs and style issues.
You will ask the LLM to find and explain them.
Tasks
-----
Part A Build a structured prompt with <persona>, <task>, and <code> tags
and ask the LLM to review analyze_me.py (TODOs 1-4).
Part B Refine the prompt to request a prioritised bug list (TODOs 5-6).
Part C Ask for a corrected version of one specific function (TODO 7).
"""
from pathlib import Path
from server_utils import chat, get_client, print_messages, print_separator
client = get_client()
# Read the file we want the LLM to analyse
code_to_review = Path("analyze_me.py").read_text()
# ── Part A: Persona + Task + Code ─────────────────────────────────────────────
print_separator("Part A Structured Prompt: Persona / Task / Code")
# TODO 1: Fill in the <persona> tag.
# Define a senior Python engineer who is rigorous about correctness
# and follows PEP-8 and best practices.
# TODO 2: Fill in the <task> tag.
# Ask the LLM to review the Python code and identify ALL bugs,
# listing each one with a short explanation of why it is a bug.
# TODO 3: The <code> tag already contains the file — do not change it.
# TODO 4: Build the messages list using only a user message (no system prompt yet).
# Call chat() and print the result.
prompt_a = f"""\
<persona>
You are a Python engineer who is rigorous about correctness and follows PEP-8 and best practices.
</persona>
<task>
Review the Python code and identify ALL bugs, listing each one with a short explanation of why it is a bug.
</task>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_a = [
{"role": "user", "content": prompt_a}
]
# print_messages(messages_a)
# response_a = chat(client, messages_a)
# print(response_a)
# ── Part B: Refine Ask for a Prioritised Bug List ───────────────────────────
print_separator("Part B Refined Prompt: Prioritised Bug List")
# TODO 5: Extend the <task> from Part A to ask the LLM to:
# - Separate bugs by severity: Critical / Medium / Style
# - For each bug: state the line number, the problem, and a one-line fix hint
#
# Tip: add a <output_format> tag that describes exactly how you want the answer
# structured (plain text for now — we tackle real machine output in Ex 3).
# TODO 6: Build messages_b with a system prompt that reinforces the persona
# and a user message with the refined prompt.
# Call chat() and print the result.
system_b = """\
<request>
<persona>You are a master python developer and teacher</persona>
<style>You follow the PEP 8 style guide</style>
<constraints>Format your response in json</constraints>
</request>
"""
prompt_b = f"""\
<persona>
You are a Python engineer who is rigorous about correctness and follows PEP-8 and best practices.
</persona>
<task>
Review the Python code and identify ALL bugs, listing each one with a short explanation of why it is a bug.
Separate bugs by severity: Critical / Medium / Style
or each bug: state the line number, the problem, and a one-line fix hint
</task>
<output_format>
...
</output_format>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_b = [
{"role": "system", "content": system_b},
{"role": "user", "content": prompt_b},
]
print_messages(messages_b)
response_b = chat(client, messages_b)
print(response_b)
# ── Part C: Request a Corrected Function ──────────────────────────────────────
print_separator("Part C Ask for a Corrected Function")
# TODO 7: Pick one buggy function from analyze_me.py (e.g. calculate_statistics).
# Write a new user message — continuing the SAME conversation as Part B —
# that asks the LLM to rewrite that function with all bugs fixed,
# including proper type hints and a docstring.
#
# Key insight: you can reuse the model's previous response by appending it to
# the messages list as an "assistant" message, then adding a new "user" message.
# This is how multi-turn conversations work with the API.
messages_c = messages_b + [
{"role": "assistant", "content": response_b}, # LLM's previous answer
{"role": "user", "content": "Fix all bugs, keep the rest as it is"},
]
print_messages(messages_c)
response_c = chat(client, messages_c)
print(response_c)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. Did the LLM find all 7 bugs? Which did it miss?\n"
"2. How did the <output_format> tag change the structure of the answer?\n"
"3. What is the advantage of continuing a conversation vs. starting fresh?\n"
"4. How would you scale this pattern to a large codebase (many files)?\n"
)

View File

@ -0,0 +1,122 @@
"""
Exercise 2 SOLUTION Persona, Task, and Data in a Structured Prompt
=======================================================================
AISE501 · Prompting in Coding · Spring Semester 2026
"""
from pathlib import Path
from server_utils import chat, get_client, print_messages, print_separator
client = get_client()
code_to_review = Path("analyze_me.py").read_text()
temperature_value=1
# ── Part A: Persona + Task + Code ─────────────────────────────────────────────
print_separator("Part A Structured Prompt: Persona / Task / Code")
prompt_a = f"""\
<persona>
You are a senior Python engineer with 10+ years of experience.
You are rigorous about correctness, follow PEP-8 strictly, and care
deeply about defensive programming and readable code.
</persona>
<task>
Review the Python code provided below.
Identify every bug and code-quality issue you can find.
For each issue, state what is wrong and why it is a problem.
</task>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_a = [
{"role": "user", "content": prompt_a}
]
print_messages(messages_a)
response_a = chat(client, messages_a, temperature=temperature_value)
print(response_a)
# ── Part B: Refine Ask for a Prioritised Bug List ───────────────────────────
print_separator("Part B Refined Prompt: Prioritised Bug List")
system_b = """\
You are a senior Python engineer performing a thorough code review.
Be concise, precise, and always refer to line numbers when available.
"""
prompt_b = f"""\
<persona>
You are a senior Python engineer with 10+ years of experience.
You are rigorous about correctness, follow PEP-8, and care about
defensive programming and readable code.
</persona>
<task>
Review the Python code below.
Identify every bug and code-quality issue.
Classify each finding by severity:
- Critical : causes a crash or wrong result under normal use
- Medium : bad practice that will cause problems in production
- Style : violates PEP-8 or reduces readability
</task>
<output_format>
For each finding produce exactly this structure (plain text):
[SEVERITY] Line <N>: <one-sentence problem description>
Fix hint: <one-sentence suggestion>
Group findings under headings: ## Critical, ## Medium, ## Style
</output_format>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_b = [
{"role": "system", "content": system_b},
{"role": "user", "content": prompt_b},
]
print_messages(messages_b)
response_b = chat(client, messages_b, temperature=temperature_value)
print(response_b)
# ── Part C: Request a Corrected Function ──────────────────────────────────────
print_separator("Part C Ask for a Corrected Function")
followup = """\
<task>
Rewrite only the `calculate_statistics` function with all bugs fixed.
Requirements:
- Handle an empty list gracefully (return None or raise ValueError with a clear message)
- Use sample variance (divide by N-1)
- Add full PEP-8 type hints
- Add a NumPy-style docstring
Return only the function code, no surrounding explanation.
</task>"""
messages_c = messages_b + [
{"role": "assistant", "content": response_b},
{"role": "user", "content": followup},
]
print_messages(messages_c)
response_c = chat(client, messages_c, temperature=temperature_value)
print(response_c)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. Did the LLM find all 7 bugs? Which did it miss?\n"
"2. How did the <output_format> tag change the structure of the answer?\n"
"3. What is the advantage of continuing a conversation vs. starting fresh?\n"
"4. How would you scale this pattern to a large codebase (many files)?\n"
)

View File

@ -0,0 +1,231 @@
"""
Exercise 3 Structured Input and Structured Output
====================================================
AISE501 · Prompting in Coding · Spring Semester 2026
Learning goals
--------------
* Request machine-parseable output (JSON and YAML) from the LLM.
* Parse the JSON response in Python and use it programmatically.
* Build a second prompt dynamically from the parsed data.
* Understand why structured output is essential for LLM pipelines.
Tasks
-----
Part A Ask the LLM to review analyze_me.py and return a JSON report (TODOs 1-4).
Part B Parse the JSON response and print a summary table (TODOs 5-6).
Part C Use the parsed data to build a follow-up prompt automatically (TODOs 7-8).
Part D Repeat Part A but request YAML instead of JSON (TODO 9).
Estimated time: 40-50 minutes
"""
import json
from pathlib import Path
from server_utils import chat, chat_json, get_client, print_messages, print_separator
client = get_client()
code_to_review = Path("analyze_me.py").read_text()
# ── Part A: Structured Input → JSON Output ────────────────────────────────────
print_separator("Part A Request JSON Output")
# TODO 1: Write a system prompt that instructs the model to ALWAYS respond
# with valid JSON and nothing else (no markdown fences, no explanation).
system_a = """\
<request>
<persona>You are a master python tutor</persona>
<style>You follow the PEP 8 style guide</style>
<constraints>Only respond in a json format following the user provided schema</constraints>
</request>
"""
# TODO 2: Write the user prompt.
# Use XML tags for <persona>, <task>, and <code>.
#
# In <task>, specify the exact JSON schema you expect:
#
schema = """{
"summary": "<one sentence overview>",
"bugs": [
{
"id": 1,
"severity": "Critical|Medium|Style",
"line": <int or null>,
"function": "<function name>",
"description": "<what is wrong>",
"fix": "<one-sentence fix hint>"
},
...
],
"overall_quality": "Poor|Fair|Good|Excellent"
}"""
#
# Tip: paste the schema directly inside a <schema> tag in your prompt.
prompt_a = f"""\
TODO: Write your structured prompt here.
Include <persona>, <task>, <schema>, and <code> tags.
<persona>
You are a Python engineer who is rigorous about correctness and follows PEP-8 and best practices.
</persona>
<task>
Review the Python code and identify ALL bugs.
Explain all the bugs you found the schema provided.
</task>
<schema>
{schema}
</schema>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_a = [
# TODO 3: build the messages list (system + user)
{"role": "system", "content": system_a},
{"role": "user", "content": prompt_a},
]
# TODO 4: call chat_json() and store the raw response string in raw_json_a.
# chat_json() adds response_format={"type": "json_object"} so the
# server guarantees the output is parseable by json.loads().
print_messages(messages_a)
raw_json_a = chat_json(client, messages_a)
print("Raw response:")
print(raw_json_a)
# ── Part B: Parse the JSON and Display a Summary ──────────────────────────────
print_separator("Part B Parse JSON and Print Summary")
# TODO 5: Parse raw_json_a with json.loads().
# Handle the case where the model returned malformed JSON
# (wrap in try/except and print a helpful error message).
report = json.loads(raw_json_a)
# TODO 6: Print a formatted summary table like this:
#
# Overall quality : Fair
# Summary : ...
#
# ID | Severity | Line | Function | Description
# ---+----------+------+-----------------------+---------------------------
# 1 | Critical | 12 | calculate_statistics | ZeroDivisionError on ...
# 2 | ...
#
# Hint: use f-strings and ljust() / rjust() for alignment.
print(f"Overall quality : {report['overall_quality']}")
print(f"Summary : {report['summary']}\n")
bugs = report.get("bugs", [])
if bugs:
headers = {
"id": "ID",
"severity": "Severity",
"line": "Line",
"function": "Function",
"description": "Description",
}
# Compute column widths
widths = {
key: max(len(headers[key]), *(len(str(b[key])) for b in bugs))
for key in headers
}
# Header row
print(
f"{headers['id'].ljust(widths['id'])} | "
f"{headers['severity'].ljust(widths['severity'])} | "
f"{headers['line'].ljust(widths['line'])} | "
f"{headers['function'].ljust(widths['function'])} | "
f"{headers['description']}"
)
# Separator row
print(
f"{'-' * widths['id']}-+-"
f"{'-' * widths['severity']}-+-"
f"{'-' * widths['line']}-+-"
f"{'-' * widths['function']}-+-"
f"{'-' * widths['description']}"
)
# Data rows
for bug in bugs:
print(
f"{str(bug['id']).ljust(widths['id'])} | "
f"{bug['severity'].ljust(widths['severity'])} | "
f"{str(bug['line']).ljust(widths['line'])} | "
f"{bug['function'].ljust(widths['function'])} | "
f"{bug['description']}"
)
# ── Part C: Use the Parsed Data to Build a Follow-Up Prompt ──────────────────
print_separator("Part C Dynamic Follow-Up Prompt from Parsed Data")
# TODO 7: Select all bugs with severity "Critical" from the parsed report.
# Build a new user prompt that:
# - Lists each critical bug by ID and description
# - Asks the LLM to provide the corrected code for each one
# - Requests the output as a JSON OBJECT (not a bare array, because
# response_format=json_object requires an object at the top level):
# {"fixes": [{"bug_id": 1, "fixed_code": "..."}, ...]}
#
# Tip: wrap the schema in a {"fixes": [...]} object so chat_json() works.
critical_bugs = [b for b in report["bugs"] if b["severity"] == "Critical"]
followup_prompt = """\
TODO: Build the follow-up prompt dynamically using the critical_bugs list.
Loop over critical_bugs to embed each bug's description in the prompt.
"""
# TODO 8: Continue the conversation (multi-turn) by appending the previous
# response and the new prompt, then call chat_json() and parse the result.
# Because the schema is {"fixes": [...]}, extract the list with ["fixes"].
# messages_c = messages_a + [
# {"role": "assistant", "content": raw_json_a},
# {"role": "user", "content": followup_prompt},
# ]
# print_messages(messages_c)
# raw_json_c = chat_json(client, messages_c)
# fixes = json.loads(raw_json_c)["fixes"]
# for fix in fixes:
# print(f"\n--- Fix for bug {fix['bug_id']} ---")
# print(fix["fixed_code"])
# ── Part D: Request YAML Instead of JSON ─────────────────────────────────────
print_separator("Part D YAML Output")
# TODO 9: Repeat Part A but ask for YAML output instead of JSON.
# Install PyYAML if needed: pip install pyyaml
# Parse the response with yaml.safe_load() and print the result.
#
# Question: Which format do you prefer for human-readable reports? For
# machine-to-machine pipelines?
# import yaml
# ...
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. What can go wrong when asking an LLM to return JSON?\n"
"2. How did the <schema> tag influence the output structure?\n"
"3. Why is structured output important for building LLM pipelines?\n"
"4. When would you use JSON vs. YAML vs. plain text?\n"
)

View File

@ -0,0 +1,188 @@
"""
Exercise 3 SOLUTION Structured Input and Structured Output
==============================================================
AISE501 · Prompting in Coding · Spring Semester 2026
"""
import json
from pathlib import Path
import yaml # pip install pyyaml
from server_utils import chat, chat_json, get_client, print_messages, print_separator
client = get_client()
code_to_review = Path("analyze_me.py").read_text()
# ── Part A: Structured Input → JSON Output ────────────────────────────────────
print_separator("Part A Request JSON Output")
system_a = """\
You are a code-review assistant. You ALWAYS respond with valid JSON and
nothing else no markdown code fences, no introductory text, no trailing
commentary. Your entire response must be parseable by json.loads().
"""
prompt_a = f"""\
<persona>
You are a senior Python engineer performing a thorough, structured code review.
</persona>
<task>
Review the Python code below and return your findings as JSON.
Follow the schema defined in <schema> exactly.
</task>
<schema>
{{
"summary": "<one-sentence overview of the code quality>",
"bugs": [
{{
"id": 1,
"severity": "Critical|Medium|Style",
"line": <integer line number or null if not applicable>,
"function": "<name of the affected function>",
"description": "<what is wrong and why it matters>",
"fix": "<one-sentence fix hint>"
}}
],
"overall_quality": "Poor|Fair|Good|Excellent"
}}
</schema>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_a = [
{"role": "system", "content": system_a},
{"role": "user", "content": prompt_a},
]
print_messages(messages_a)
raw_json_a = chat_json(client, messages_a) # response_format=json_object → always valid JSON
print("Raw response:")
print(raw_json_a)
# ── Part B: Parse the JSON and Display a Summary ──────────────────────────────
print_separator("Part B Parse JSON and Print Summary")
report = json.loads(raw_json_a)
print(f"Overall quality : {report['overall_quality']}")
print(f"Summary : {report['summary']}\n")
col_w = [4, 10, 6, 24, 45]
header = (
f"{'ID':<{col_w[0]}} | {'Severity':<{col_w[1]}} | {'Line':<{col_w[2]}} | "
f"{'Function':<{col_w[3]}} | {'Description':<{col_w[4]}}"
)
print(header)
print("-" * len(header))
for bug in report["bugs"]:
line_str = str(bug["line"]) if bug["line"] is not None else ""
print(
f"{bug['id']:<{col_w[0]}} | "
f"{bug['severity']:<{col_w[1]}} | "
f"{line_str:<{col_w[2]}} | "
f"{bug['function']:<{col_w[3]}} | "
f"{bug['description'][:col_w[4]]}"
)
# ── Part C: Use the Parsed Data to Build a Follow-Up Prompt ──────────────────
print_separator("Part C Dynamic Follow-Up Prompt from Parsed Data")
critical_bugs = [b for b in report["bugs"] if b["severity"] == "Critical"]
if not critical_bugs:
print("No critical bugs found — nothing to fix.")
else:
lines = []
for b in critical_bugs:
lines.append(f' - Bug {b["id"]} (line {b["line"]}): {b["description"]}')
bug_list_text = "\n".join(lines)
followup_prompt = f"""\
<task>
The following critical bugs were found in analyze_me.py:
{bug_list_text}
For each bug, provide the corrected Python code snippet (the full function
is fine). Return your answer as a JSON object with this schema:
{{
"fixes": [
{{"bug_id": <int>, "fixed_code": "<corrected Python code as a string>"}}
]
}}
No markdown, no explanation only the JSON object.
</task>"""
messages_c = messages_a + [
{"role": "assistant", "content": raw_json_a},
{"role": "user", "content": followup_prompt},
]
print_messages(messages_c)
raw_json_c = chat_json(client, messages_c)
fixes = json.loads(raw_json_c)["fixes"]
for fix in fixes:
print(f"\n--- Fix for bug {fix['bug_id']} ---")
print(fix["fixed_code"])
# ── Part D: Request YAML Instead of JSON ─────────────────────────────────────
print_separator("Part D YAML Output")
system_d = """\
You are a code-review assistant. You ALWAYS respond with valid YAML and
nothing else no markdown fences, no introductory text.
"""
prompt_d = f"""\
<persona>
You are a senior Python engineer performing a structured code review.
</persona>
<task>
Review the code below and return your findings as YAML.
Use the same fields as before: summary, bugs (with id/severity/line/
function/description/fix), and overall_quality.
</task>
<code language="python" filename="analyze_me.py">
{code_to_review}
</code>"""
messages_d = [
{"role": "system", "content": system_d},
{"role": "user", "content": prompt_d},
]
print_messages(messages_d)
raw_yaml = chat(client, messages_d, temperature=0.2)
try:
yaml_report = yaml.safe_load(raw_yaml)
print(f"Parsed YAML overall quality: {yaml_report.get('overall_quality')}")
print(f"Number of bugs found: {len(yaml_report.get('bugs', []))}")
except yaml.YAMLError as e:
print(f"ERROR: malformed YAML: {e}")
print(raw_yaml)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. What can go wrong when asking an LLM to return JSON?\n"
"2. How did the <schema> tag influence the output structure?\n"
"3. Why is structured output important for building LLM pipelines?\n"
"4. When would you use JSON vs. YAML vs. plain text?\n"
)

View File

@ -0,0 +1,300 @@
"""
Exercise 4 Build Your Own Chain-of-Thought Pipeline
======================================================
AISE501 · Prompting in Coding · Spring Semester 2026
Learning goals
--------------
* Understand that reasoning models (o1, DeepSeek-R1, Qwen3 think mode)
generate a hidden "plan" before giving the final answer.
* Replicate this behaviour manually using multiple LLM calls:
Call 1 (Planning) structured input structured JSON plan
Calls 2N (Execution) iterate step-by-step, validating each step
* See why explicit reasoning steps improve answer quality for complex tasks.
Background
----------
When you disable Qwen3's built-in thinking mode (as we do in server_utils),
you get fast, direct answers but no explicit reasoning.
In this exercise you rebuild that reasoning step yourself, step by step,
so you can inspect and control the thinking process.
The problem
-----------
Given the buggy analyze_me.py from earlier exercises, design and implement
a corrected, production-ready version of the full module.
Tasks
-----
Part A Planning phase: structured input JSON reasoning plan (TODOs 1-5).
Part B Iterative execution: apply each plan step one at a time,
validating syntax after each step (TODOs 6-10).
Part C Reflection compare with and without CoT (TODO 11).
Estimated time: 50-60 minutes
"""
import json
import subprocess
import sys
from pathlib import Path
from server_utils import (
chat, chat_json, get_client, print_messages, print_separator,
strip_code_fences,
)
client = get_client()
code_to_fix = Path("analyze_me.py").read_text()
# ── The Problem Statement ─────────────────────────────────────────────────────
# We will use this description in both phases so we define it once.
PROBLEM = """\
Rewrite the Python module analyze_me.py so that it is correct,
robust, and production-ready.
Requirements:
1. calculate_statistics() must handle empty lists without crashing.
2. Use sample variance (divide by N-1).
3. process_data() must use a context manager and handle non-numeric lines.
4. normalize() must fix the operator-precedence bug and raise ValueError
for unknown methods.
5. All functions must have PEP-484 type hints and NumPy-style docstrings.
6. The module must pass basic sanity checks when run as __main__.
"""
# ── Part A: Planning Phase ────────────────────────────────────────────────────
print_separator("Part A Planning Phase (CoT Step 1)")
# The goal of this phase is NOT to write the code — it is to produce a
# structured plan: what steps are needed and in what order?
# TODO 1: Write a system prompt that instructs the model to act as a
# "software architect" whose job is ONLY to produce a plan,
# never to write the final code.
# IMPORTANT: explicitly forbid code snippets in all fields —
# use plain English only. This prevents unescaped quotes from
# breaking the JSON output.
# Enforce JSON-only output.
system_plan = """\
TODO: Write a system prompt for the planning phase.
The model should only reason and plan, not write code.
Enforce JSON-only output.
"""
# TODO 2: Write the planning user prompt using XML tags:
# <problem> embed the PROBLEM string
# <code> embed the buggy code_to_fix
# <task> ask for a step-by-step plan
# <schema> specify the exact JSON schema for the plan:
#
# {
# "goal": "<one sentence goal>",
# "steps": [
# {
# "step_id": 1,
# "title": "<short title>",
# "reasoning": "<why this step is needed>",
# "action": "<what to do in this step — plain English, no code>",
# "depends_on": [] // list of step_ids this step depends on
# },
# ...
# ]
# }
prompt_plan = f"""\
TODO: Write the planning prompt here.
Use <problem>, <code>, <task>, and <schema> tags.
<problem>
{PROBLEM}
</problem>
<code language="python" filename="analyze_me.py">
{code_to_fix}
</code>"""
# TODO 3: Build messages_plan (system + user) and call chat_json().
# Use chat_json() (not chat()) so the server enforces valid JSON via
# response_format={"type": "json_object"}.
# Use max_tokens=4096 — the plan can be long and would get cut off
# with the default 2048, producing truncated (unparseable) JSON.
messages_plan = [
# TODO: add system and user messages
]
# print_messages(messages_plan)
# raw_plan = chat_json(client, messages_plan, max_tokens=4096)
# print("Raw plan JSON:")
# print(raw_plan)
# TODO 4: Parse raw_plan with json.loads().
# Print each step in a readable format:
# Step 1 <title>
# Reasoning : <reasoning>
# Action : <action>
# plan = json.loads(raw_plan)
# print(f"\nGoal: {plan['goal']}\n")
# for step in plan["steps"]:
# print(f"Step {step['step_id']} {step['title']}")
# print(f" Reasoning : {step['reasoning']}")
# print(f" Action : {step['action']}\n")
# TODO 5: (Optional) Inspect the plan critically.
# Does the order of steps make sense?
# Are any steps missing?
# You can edit the plan dict before passing it to the execution phase.
# ── Part B: Iterative Execution Phase ────────────────────────────────────────
print_separator("Part B Iterative Execution Phase (CoT Step 2)")
# KEY INSIGHT: Instead of dumping the entire plan into one big prompt
# (which would just be another one-shot), we iterate through each step
# individually. After every step we:
# 1. Feed the model only the CURRENT step + the accumulated code so far
# 2. Validate the output (syntax check via py_compile)
# 3. Use the validated output as input for the next step
#
# This mirrors how a real developer works: implement one change, verify it
# compiles, then move on. The model always works with CONCRETE code from
# the previous step rather than an abstract plan of what it intends to write.
# TODO 6: Write a system prompt for the execution phase.
# The model should act as a developer who receives the current
# state of a module plus a single step to implement.
# It should apply ONLY that step and return the full updated module.
system_exec = """\
TODO: Write a system prompt for the step-by-step execution phase.
The model should apply ONE step at a time.
"""
# TODO 7: Complete the validate_syntax() function below.
# It should write code to a temp file and run py_compile on it.
# Return (True, "") if syntax is valid, (False, error_message) otherwise.
def validate_syntax(code: str) -> tuple[bool, str]:
"""Write code to a temp file and run py_compile to check syntax."""
tmp = Path("_tmp_validate.py")
# TODO: write code to tmp, run py_compile, clean up, return result
tmp.unlink(missing_ok=True)
return True, "" # placeholder
# TODO 8: Implement the step-by-step execution loop.
# Start with current_code = code_to_fix (the original buggy code).
# For each step in plan["steps"]:
# a) Build a prompt with <current_code>, <step>, and <task> tags
# b) Call chat() with the prompt
# c) Strip code fences from the response
# d) Validate syntax using validate_syntax()
# e) If valid: update current_code
# f) If invalid: retry ONCE with error feedback
# g) Print the code after each step
# current_code = code_to_fix
#
# for step in plan["steps"]:
# step_id = step["step_id"]
# print_separator(f"Executing Step {step_id} {step['title']}")
#
# prompt_step = f"""\
# TODO: Build the per-step prompt here.
# Include <current_code>, <step>, and <task> tags.
# Tell the model to apply ONLY this step."""
#
# messages_step = [
# {"role": "system", "content": system_exec},
# {"role": "user", "content": prompt_step},
# ]
#
# print_messages(messages_step)
# raw_response = chat(client, messages_step, temperature=0.2, max_tokens=4096)
# step_code = strip_code_fences(raw_response)
#
# # Validate syntax
# ok, error_msg = validate_syntax(step_code)
# if ok:
# print(f" [PASS] Step {step_id} syntax OK")
# current_code = step_code
# else:
# print(f" [FAIL] Step {step_id} syntax error: {error_msg}")
# # TODO: retry with error feedback (see TODO 9)
#
# print(f"\n--- Code after Step {step_id} ---")
# print(current_code)
# TODO 9: Implement the retry logic for syntax errors.
# When a step produces invalid syntax:
# a) Build a retry prompt with the <error> and the broken <code>
# b) Ask the model to fix the syntax error
# c) Validate again
# d) If still broken, keep the last valid code and continue
# TODO 10: Save the final result and run it as a validation.
# - Save current_code to "analyze_me_fixed.py"
# - Run it with subprocess and print the output
# Path("analyze_me_fixed.py").write_text(current_code)
# print("\nSaved iterative CoT result to analyze_me_fixed.py")
#
# result = subprocess.run(
# [sys.executable, "analyze_me_fixed.py"],
# capture_output=True, text=True,
# )
# print("STDOUT:", result.stdout)
# if result.stderr:
# print("STDERR:", result.stderr)
# print(f"Exit code: {result.returncode}")
# ── Part C: Compare With and Without CoT ─────────────────────────────────────
print_separator("Part C Baseline: Direct Prompt Without CoT")
# TODO 11: Send the same problem to the model in a SINGLE prompt with NO plan.
# Compare this response with the iterative CoT version.
direct_prompt = f"""\
TODO: Write a direct, single-shot prompt asking the model to rewrite
analyze_me.py according to the PROBLEM requirements.
No plan, no iteration just ask directly.
<problem>
{PROBLEM}
</problem>
<code language="python" filename="analyze_me.py">
{code_to_fix}
</code>"""
# messages_direct = [{"role": "user", "content": direct_prompt}]
# print_messages(messages_direct)
# direct_response = chat(client, messages_direct, temperature=0.3, max_tokens=4096)
# print(direct_response)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. How did the iterative CoT output differ from the direct single-shot?\n"
"2. Did the validation step catch any syntax errors? How were they fixed?\n"
"3. What would happen if you gave the model a deliberately wrong plan?\n"
"4. How does this manual CoT pipeline relate to built-in thinking modes\n"
" in models like o1, DeepSeek-R1, and Qwen3 with think mode enabled?\n"
"5. What are the trade-offs of step-by-step iteration vs. one-shot?\n"
" (Think: latency, cost, error isolation, debuggability)\n"
"6. How could you extend the validation step beyond syntax checking?\n"
" (Hint: unit tests, type checking, linting)\n"
)

View File

@ -0,0 +1,279 @@
"""
Exercise 4 SOLUTION Build Your Own Chain-of-Thought Pipeline
================================================================
AISE501 · Prompting in Coding · Spring Semester 2026
"""
import ast
import json
import subprocess
import sys
from pathlib import Path
from server_utils import (
chat, chat_json, get_client, print_messages, print_separator,
strip_code_fences,
)
client = get_client()
code_to_fix = Path("analyze_me.py").read_text()
PROBLEM = """\
Rewrite the Python module analyze_me.py so that it is correct,
robust, and production-ready.
Requirements:
1. calculate_statistics() must handle empty lists without crashing.
2. Use sample variance (divide by N-1).
3. process_data() must use a context manager and handle non-numeric lines.
4. normalize() must fix the operator-precedence bug and raise ValueError
for unknown methods.
5. All functions must have PEP-484 type hints and NumPy-style docstrings.
6. The module must pass basic sanity checks when run as __main__.
"""
# ── Part A: Planning Phase ────────────────────────────────────────────────────
print_separator("Part A Planning Phase (CoT Step 1)")
system_plan = """\
You are a software architect. Your ONLY job right now is to produce a
structured reasoning plan. You must NOT write any Python code or code
snippets anywhere in your response not in action fields, not in
reasoning fields, nowhere. Use plain English descriptions only.
Respond with valid JSON only (no markdown fences, no extra text).
"""
prompt_plan = f"""\
<problem>
{PROBLEM}
</problem>
<code language="python" filename="analyze_me.py">
{code_to_fix}
</code>
<task>
Analyse the problem and the buggy code above.
Produce a step-by-step plan that a developer can follow to implement
the corrected module. Each step must be atomic and self-contained.
</task>
<schema>
{{
"goal": "<one-sentence goal>",
"steps": [
{{
"step_id": 1,
"title": "<short title>",
"reasoning": "<why this step is necessary>",
"action": "<concrete action to take — plain English only, no code>",
"depends_on": []
}}
]
}}
</schema>"""
messages_plan = [
{"role": "system", "content": system_plan},
{"role": "user", "content": prompt_plan},
]
print_messages(messages_plan)
raw_plan = chat_json(client, messages_plan, max_tokens=4096)
print("Raw plan JSON:")
print(raw_plan)
plan = json.loads(raw_plan)
print(f"\nGoal: {plan['goal']}\n")
for step in plan["steps"]:
print(f"Step {step['step_id']} {step['title']}")
print(f" Reasoning : {step['reasoning']}")
print(f" Action : {step['action']}")
deps = step.get("depends_on", [])
if deps:
print(f" Depends on: steps {deps}")
print()
# ── Part B: Iterative Execution Phase ────────────────────────────────────────
print_separator("Part B Iterative Execution Phase (CoT Step 2)")
# Instead of dumping the entire plan into a single prompt, we iterate through
# each step individually. After every step we:
# 1. Feed the model only the CURRENT step + the accumulated code so far
# 2. Validate the output (syntax check via py_compile)
# 3. Use the validated output as input for the next step
#
# This mirrors how a real developer works: implement one change, verify it
# compiles, then move on. It also means the model always works with CONCRETE
# code from the previous step rather than an abstract plan of what it intends
# to write.
system_exec = """\
You are a senior Python developer. You receive the current state of a
Python module together with a single step to implement. Apply ONLY the
requested change. Return the complete updated module no explanations
outside the code block.
"""
def validate_syntax_ast(code: str) -> tuple[bool, str]:
"""Use ast.parse to check whether code is syntactically valid Python."""
try:
ast.parse(code)
return True, ""
except SyntaxError as e:
return False, str(e)
def validate_syntax(code: str) -> tuple[bool, str]:
"""Write code to a temp file and run py_compile to check syntax."""
tmp = Path("_tmp_validate.py")
# TODO: write code to tmp, run py_compile, clean up, return result
tmp.unlink(missing_ok=True)
return True, "" # placeholder
current_code = code_to_fix # start with the original buggy code
for step in plan["steps"]:
step_id = step["step_id"]
print_separator(f"Executing Step {step_id} {step['title']}")
prompt_step = f"""\
<current_code>
{current_code}
</current_code>
<step>
Step {step_id}: {step['title']}
Action: {step['action']}
Reasoning: {step['reasoning']}
</step>
<task>
Apply ONLY this single step to the current code above.
Do not skip ahead to other steps.
Mark your change with a comment: # Step {step_id} {step['title']}
Return the complete updated Python module.
Do not include any explanation outside the code.
</task>"""
messages_step = [
{"role": "system", "content": system_exec},
{"role": "user", "content": prompt_step},
]
print_messages(messages_step)
raw_response = chat(client, messages_step, temperature=0.2, max_tokens=4096)
step_code = strip_code_fences(raw_response)
# ── Validate: syntax check before moving on ──
ok, error_msg = validate_syntax(step_code)
if ok:
print(f" [PASS] Step {step_id} syntax OK")
current_code = step_code
else:
print(f" [FAIL] Step {step_id} syntax error:\n{error_msg}")
print(" Retrying with error feedback...")
# Give the model one chance to fix its own syntax error
retry_prompt = f"""\
The code you returned has a syntax error:
<error>
{error_msg}
</error>
<code>
{step_code}
</code>
<task>
Fix the syntax error and return the complete corrected module.
Do not include any explanation outside the code.
</task>"""
messages_retry = [
{"role": "system", "content": system_exec},
{"role": "user", "content": retry_prompt},
]
print_messages(messages_retry)
retry_response = chat(client, messages_retry, temperature=0.1, max_tokens=4096)
retry_code = strip_code_fences(retry_response)
ok2, error_msg2 = validate_syntax(retry_code)
if ok2:
print(f" [PASS] Step {step_id} retry syntax OK")
current_code = retry_code
else:
print(f" [FAIL] Step {step_id} retry still has errors: {error_msg2}")
print(" Continuing with last valid code.")
print(f"\n--- Code after Step {step_id} ---")
print(current_code)
print()
# Save final result
Path("analyze_me_fixed.py").write_text(current_code)
print("\nSaved iterative CoT result to analyze_me_fixed.py")
# Final validation: run the module
print_separator("Final Validation Running analyze_me_fixed.py")
result = subprocess.run(
[sys.executable, "analyze_me_fixed.py"],
capture_output=True, text=True,
)
print("STDOUT:", result.stdout)
if result.stderr:
print("STDERR:", result.stderr)
print(f"Exit code: {result.returncode}")
# ── Part C: Baseline Direct Prompt Without CoT ─────────────────────────────
print_separator("Part C Baseline: Direct Prompt Without CoT")
direct_prompt = f"""\
<problem>
{PROBLEM}
</problem>
<code language="python" filename="analyze_me.py">
{code_to_fix}
</code>
<task>
Rewrite the module so that it satisfies all requirements in <problem>.
Return only the corrected Python code.
</task>"""
messages_direct = [{"role": "user", "content": direct_prompt}]
print_messages(messages_direct)
direct_response = chat(client, messages_direct, temperature=0.3, max_tokens=4096)
print(direct_response)
Path("analyze_me_direct.py").write_text(strip_code_fences(direct_response))
print("\nSaved direct-prompt result to analyze_me_direct.py")
print(
"\nCompare analyze_me_fixed.py (CoT) with analyze_me_direct.py (direct).\n"
"Which is more complete? Which follows the requirements more closely?"
)
# ── Reflection Questions ──────────────────────────────────────────────────────
print_separator("Reflection Questions")
print(
"1. How did the iterative CoT output differ from the direct single-shot?\n"
"2. Did the validation step catch any syntax errors? How were they fixed?\n"
"3. What would happen if you gave the model a deliberately wrong plan?\n"
"4. How does this manual CoT pipeline relate to built-in thinking modes\n"
" in models like o1, DeepSeek-R1, and Qwen3 with think mode enabled?\n"
"5. What are the trade-offs of step-by-step iteration vs. one-shot?\n"
" (Think: latency, cost, error isolation, debuggability)\n"
"6. How could you extend the validation step beyond syntax checking?\n"
" (Hint: unit tests, type checking, linting)\n"
)

Binary file not shown.

View File

@ -0,0 +1,215 @@
"""
server_utils.py Shared utilities for AISE501 Prompting Exercises
======================================================================
Connects to the vLLM inference server at silicon.fhgr.ch via the
OpenAI-compatible API.
This file is complete no TODOs here.
"""
from openai import OpenAI
# ── Server configuration ──────────────────────────────────────────────────────
HOST = "silicon.fhgr.ch"
PORT = 7080
API_KEY = "EMPTY"
MODEL = "qwen3.5-35b-a3b" # model ID served on silicon.fhgr.ch
def get_client() -> OpenAI:
"""Return an OpenAI-compatible client pointing at the vLLM server."""
base_url = f"http://{HOST}:{PORT}/v1"
return OpenAI(base_url=base_url, api_key=API_KEY)
def list_models(client: OpenAI) -> list[str]:
"""Return all model IDs available on the server."""
return [m.id for m in client.models.list().data]
def chat(
client: OpenAI,
messages: list[dict],
model: str = MODEL,
temperature: float = 0.2,
max_tokens: int = 2048,
) -> str:
"""
Send a list of chat messages to the LLM and return the response text.
Qwen3's built-in chain-of-thought "think" mode is disabled via
``extra_body`` so that replies are direct and not wrapped in
<think></think> blocks.
Parameters
----------
client : OpenAI client returned by get_client()
messages : List of {"role": ..., "content": ...} dicts
model : Model ID (default: module-level MODEL constant)
temperature : Sampling temperature (0 = deterministic, 1 = creative)
max_tokens : Maximum number of tokens in the response
"""
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
)
return response.choices[0].message.content
def chat_json(
client: OpenAI,
messages: list[dict],
model: str = MODEL,
temperature: float = 0.2,
max_tokens: int = 2048,
) -> str:
"""
Like chat(), but forces the model to emit syntactically valid JSON via
response_format={"type": "json_object"}.
The server constrains token sampling so the output is always parseable
by json.loads() no post-processing needed. Use this whenever you
need structured JSON output (Exercises 3 and 4).
Parameters are the same as chat(); temperature defaults to 0.2 because
deterministic output is usually preferable for structured data.
"""
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
response_format={"type": "json_object"},
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
)
return response.choices[0].message.content
def _repair_json_strings(text: str) -> str:
"""
Replace unescaped control characters (newline, tab, carriage return)
inside JSON string values with their proper escape sequences.
LLMs frequently emit literal newlines inside long string values, which
is invalid JSON. This function fixes that without touching structural
whitespace outside strings.
"""
result: list[str] = []
in_string = False
escape = False
_escapes = {'\n': '\\n', '\r': '\\r', '\t': '\\t'}
for ch in text:
if escape:
result.append(ch)
escape = False
continue
if ch == '\\' and in_string:
result.append(ch)
escape = True
continue
if ch == '"':
in_string = not in_string
result.append(ch)
continue
if in_string and ch in _escapes:
result.append(_escapes[ch])
continue
result.append(ch)
return ''.join(result)
def extract_json(text: str) -> str:
"""
Extract and repair a JSON object or array from an LLM response that may
contain extra prose, markdown code fences, or unescaped control characters.
Strategy:
1. Strip markdown ```json ... ``` or ``` ... ``` fences.
2. Find the first '{' or '[' and extract to the matching closing bracket.
3. Repair unescaped newlines/tabs inside string values.
Returns the cleaned JSON string, or the original text as a fallback
(so json.loads can raise a meaningful error with context).
"""
import re
# 1. Strip markdown fences
fenced = re.sub(r"```(?:json)?\s*([\s\S]*?)\s*```", r"\1", text.strip())
if fenced != text.strip():
return _repair_json_strings(fenced.strip())
# 2. Find first JSON container and extract to matching close
extracted = text
for start_char, end_char in [('{', '}'), ('[', ']')]:
idx = text.find(start_char)
if idx == -1:
continue
depth = 0
in_string = False
escape = False
for i, ch in enumerate(text[idx:], start=idx):
if escape:
escape = False
continue
if ch == '\\' and in_string:
escape = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == start_char:
depth += 1
elif ch == end_char:
depth -= 1
if depth == 0:
extracted = text[idx: i + 1]
break
break
# 3. Repair unescaped control characters inside string values
return _repair_json_strings(extracted)
def strip_code_fences(text: str) -> str:
"""Remove markdown code fences (```python ... ```) from LLM output.
LLMs often wrap code in fences even when told not to. Call this before
writing LLM-generated code to a .py file so it is directly executable.
"""
import re
text = text.strip()
text = re.sub(r"^```\w*\n?", "", text)
text = re.sub(r"\n?```\s*$", "", text)
return text.strip()
def print_messages(messages: list[dict]) -> None:
"""Print the full messages list before sending it to the LLM.
Call this before chat() or chat_json() to inspect the exact prompt
hierarchy (system + user + assistant turns) that the model receives.
This is the primary debugging and learning tool for prompt engineering.
"""
width = 64
print("\n" + "" * width)
print(" PROMPT SENT TO LLM")
print("" * width)
for msg in messages:
role = msg["role"].upper()
print(f"\n── [{role}] " + "" * max(0, width - len(role) - 6))
print(msg["content"])
print("\n" + "" * width)
def print_separator(title: str = "") -> None:
"""Print a visual separator with an optional title."""
width = 64
print("\n" + "" * width)
if title:
print(f" {title}")
print("" * width)

View File

@ -0,0 +1,23 @@
"""
test_connection.py Verify the vLLM server connection
=========================================================
Run this script from the prompting_exercises/ directory before starting
the exercises:
python test_connection.py
Expected output:
Models available: ['qwen3.5-35b-a3b']
Connection OK.
"""
from server_utils import get_client, list_models
client = get_client()
models = list_models(client)
print(f"Models available: {models}")
if models:
print("Connection OK.")
else:
print("WARNING: no models returned check server address and port.")

BIN
code_embeddings_pca.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

BIN
code_embeddings_tsne.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

BIN
pca_denoising_analysis.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 398 KiB

View File

@ -0,0 +1,72 @@
"""
Bad example
"""
def calc (l) :
t =0
for i in l:
t = t + i
a = t / len (l)
if a >=90:
g = "A"
elif a >=80:
g = "B"
elif a >=70:
g = "C"
elif a >=60:
g = "D"
else :
g = "F"
return g, a
def doeverything (n, s1, s2, s3, s4, s5) :
print ("Processing student :"+ n)
l = [s1, s2, s3, s4, s5]
r = calc (l)
print ("Average :"+ str (r [1]))
print ("Grade :"+ r [0])
if r[1] >= 60:
print ("Status : PASSED")
else:
print ("Status : FAILED")
return r
# main program
x = "John"
doeverything (x,85,90,78,92,88)
print ("---")
y = "Jane"
doeverything (y,55,60,45,50,58)
print ("---")
z = "Bob"
doeverything (z,70,75,80,72,78)
"""
[x] Naming conventions (variables, functions, classes)
[x] Code structure and indentation
[x] Magic numbers and constants
[x] Function length and single responsibility
[ ] DRY principle (Dont Repeat Yourself)
[x] Comments and documentation
[x] Error handling
[x] Whitespace and formatting
[ ] Mutable default arguments
"""
"""
good example
"""
def calculate_avg(points: list[int]) -> float:
return sum(points) / len(points)
def calculate_grade(point_avg: float) -> str:
grade_dict = {
(lambda avg: avg >= 90): "A",
(lambda avg: avg >= 80): "B",
(lambda avg: avg >= 70): "C",
(lambda avg: avg >= 60): "D",
(lambda avg: avg < 60): "F"
}
return grade_dict.get(point_avg)