527 lines
19 KiB
TeX
527 lines
19 KiB
TeX
\documentclass[12pt,a4paper]{article}
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage[english]{babel}
|
|
\usepackage{geometry}
|
|
\geometry{margin=2.5cm}
|
|
\usepackage{xcolor}
|
|
\usepackage{tcolorbox}
|
|
\usepackage{booktabs}
|
|
\usepackage{hyperref}
|
|
\usepackage{listings}
|
|
\usepackage{enumitem}
|
|
|
|
\definecolor{seblue}{rgb}{0.0,0.28,0.67}
|
|
\definecolor{segreen}{rgb}{0.13,0.55,0.13}
|
|
\definecolor{sered}{rgb}{0.7,0.13,0.13}
|
|
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
|
|
\definecolor{codegreen}{rgb}{0,0.6,0}
|
|
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
|
|
|
\lstdefinestyle{pystyle}{
|
|
backgroundcolor=\color{backcolour},
|
|
commentstyle=\color{codegreen},
|
|
keywordstyle=\color{blue},
|
|
stringstyle=\color{codepurple},
|
|
basicstyle=\ttfamily\footnotesize,
|
|
breaklines=true,
|
|
keepspaces=true,
|
|
showstringspaces=false,
|
|
tabsize=4,
|
|
language=Python
|
|
}
|
|
\lstset{style=pystyle}
|
|
|
|
\newtcolorbox{badbox}{
|
|
colback=red!5!white,
|
|
colframe=sered,
|
|
title=Bad Code,
|
|
fonttitle=\bfseries\small,
|
|
boxrule=0.8pt, arc=2pt,
|
|
top=2pt, bottom=2pt, left=4pt, right=4pt
|
|
}
|
|
|
|
\newtcolorbox{goodbox}{
|
|
colback=green!5!white,
|
|
colframe=segreen,
|
|
title=Clean Code,
|
|
fonttitle=\bfseries\small,
|
|
boxrule=0.8pt, arc=2pt,
|
|
top=2pt, bottom=2pt, left=4pt, right=4pt
|
|
}
|
|
|
|
\newtcolorbox{principlebox}[1][]{
|
|
colback=blue!5!white,
|
|
colframe=seblue,
|
|
title=#1,
|
|
fonttitle=\bfseries\small,
|
|
boxrule=0.8pt, arc=2pt,
|
|
top=2pt, bottom=2pt, left=4pt, right=4pt
|
|
}
|
|
|
|
\title{\textcolor{seblue}{Code Analysis: Bank Account Transaction Processor}\\[0.3em]
|
|
\large What Makes Code Bad and How to Fix It\\[0.3em]
|
|
\normalsize AISE501 -- AI in Software Engineering I}
|
|
\author{Dr.\ Florian Herzog}
|
|
\date{Spring Semester 2026}
|
|
|
|
\begin{document}
|
|
\maketitle
|
|
\tableofcontents
|
|
\newpage
|
|
|
|
% ============================================
|
|
\section{Overview}
|
|
% ============================================
|
|
|
|
This document analyses two implementations of a bank account transaction processor.
|
|
Both read account state and transactions from JSON files, validate each transaction, apply valid ones, reject invalid ones, and write results.
|
|
Both produce identical output, but \texttt{bank\_bad.py} violates many PEP\,8 and clean code principles, while \texttt{bank\_good.py} follows them consistently.
|
|
|
|
% ============================================
|
|
\section{Violation 1: Unused Imports and Import Formatting}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
import json,sys,os,copy;from datetime import datetime
|
|
\end{lstlisting}
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
import json
|
|
from typing import TypedDict, Optional
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\textbf{What is wrong:}
|
|
\begin{itemize}
|
|
\item \texttt{sys}, \texttt{os}, \texttt{copy}, and \texttt{datetime} are imported but \textbf{never used}.
|
|
\item All imports are \textbf{on a single line} separated by commas, with a semicolon joining two import statements.
|
|
\item PEP\,8 requires each import on its own line and groups separated by blank lines (standard library, third-party, local).
|
|
\end{itemize}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{PEP\,8 -- Imports}: Imports should be on separate lines. Remove unused imports.
|
|
\item \textbf{KISS}: Unused imports add noise and suggest false dependencies.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 2: No Documentation or Docstrings}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
The file has \textbf{no module docstring} and \textbf{no function docstrings}. The only comment in the entire file is:
|
|
\begin{lstlisting}
|
|
# find account
|
|
...
|
|
# print results
|
|
\end{lstlisting}
|
|
These comments describe \textit{what} the next line does (which is already obvious from the code), not \textit{why}.
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
"""Bank account transaction processor.
|
|
|
|
Reads account state and a list of transactions from JSON files,
|
|
validates and applies each transaction, then writes updated account
|
|
state and a transaction log (accepted / rejected) to output files.
|
|
"""
|
|
\end{lstlisting}
|
|
Every function has a docstring:
|
|
\begin{lstlisting}
|
|
def validate_common(
|
|
account: Optional[Account],
|
|
amount: float,
|
|
) -> Optional[str]:
|
|
"""Run validations shared by all transaction types.
|
|
|
|
Returns an error message string, or None if valid.
|
|
"""
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{PEP\,257}: All public modules and functions should have docstrings.
|
|
\item \textbf{Clean Code -- Comments}: Don't add noise comments that just restate the code. Comments should explain \textit{why}, not \textit{what}.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 3: Implicit Data Model}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
The bad version operates on raw dictionaries with no type declarations.
|
|
A reader must trace through the JSON file and every dictionary access to understand the data shape:
|
|
\begin{lstlisting}
|
|
def proc(accs,txns):
|
|
for t in txns:
|
|
tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
|
|
a=None
|
|
for x in accs:
|
|
if x['account_id']==aid:a=x
|
|
\end{lstlisting}
|
|
What fields does \texttt{t} have? What fields does \texttt{a} have? There is no way to know without reading the JSON file.
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
The good version defines explicit data types:
|
|
\begin{lstlisting}
|
|
class Account(TypedDict):
|
|
"""A bank account with its current state."""
|
|
account_id: str
|
|
holder: str
|
|
balance: float
|
|
currency: str
|
|
status: str # "active" or "frozen"
|
|
|
|
class Transaction(TypedDict, total=False):
|
|
"""A financial transaction to be processed."""
|
|
id: str
|
|
type: str # "deposit", "withdrawal", or "transfer"
|
|
account_id: str
|
|
amount: float
|
|
description: str
|
|
to_account_id: str # only for transfers
|
|
status: str # added after processing
|
|
reason: str # added on rejection
|
|
\end{lstlisting}
|
|
All function signatures carry type annotations:
|
|
\begin{lstlisting}
|
|
def find_account(accounts: list[Account], account_id: str) -> Optional[Account]:
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{Zen of Python}: ``Explicit is better than implicit.''
|
|
\item \textbf{Clean Code -- Readability}: A reader should understand the data contract without tracing through runtime data.
|
|
\item \textbf{PEP\,484 / PEP\,589}: Use type hints and \texttt{TypedDict} to document the structure of dictionary-based data.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 4: Poor Naming}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
def loadJ(p): # "J" for JSON? "p" for path?
|
|
def saveJ(p,d): # "d" for data?
|
|
def proc(accs,txns): # "proc" does what exactly?
|
|
ok=[];bad=[] # acceptable vs. rejected
|
|
tp=t['type'] # "tp" is unpronounceable
|
|
aid=t['account_id'] # "aid" looks like "aid" (help)
|
|
amt=t['amount'] # "amt" -- abbreviation
|
|
tid=t['id'] # "tid" -- never used again!
|
|
a=None # "a" for account
|
|
ta=None # "ta" for target account
|
|
for x in accs: # "x" for what?
|
|
D=loadJ(...) # capital "D" for a local variable
|
|
T=loadJ(...) # capital "T" for a local variable
|
|
\end{lstlisting}
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
def load_json(file_path):
|
|
def save_json(file_path, data):
|
|
def find_account(accounts, account_id):
|
|
def validate_common(account, amount):
|
|
def process_deposit(accounts, transaction):
|
|
def process_withdrawal(accounts, transaction):
|
|
def process_transfer(accounts, transaction):
|
|
def process_all_transactions(accounts, transactions):
|
|
def print_results(accounts, accepted, rejected):
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\textbf{What is wrong:}
|
|
\begin{itemize}
|
|
\item Function names use \textbf{abbreviations} (\texttt{loadJ}, \texttt{saveJ}, \texttt{proc}) instead of descriptive snake\_case names.
|
|
\item Variable names are \textbf{single letters or short abbreviations} (\texttt{a}, \texttt{t}, \texttt{x}, \texttt{tp}, \texttt{aid}, \texttt{amt}, \texttt{ta}).
|
|
\item \texttt{tid} is assigned but \textbf{never used} --- dead code.
|
|
\item \texttt{D} and \texttt{T} use \textbf{uppercase}, suggesting constants, but they are local variables.
|
|
\item The name \texttt{ok} for accepted transactions and \texttt{bad} for rejected ones is \textbf{imprecise}.
|
|
\end{itemize}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}.
|
|
\item \textbf{Clean Code -- Descriptive Names}: ``Other developers should figure out what a variable stores just by reading its name.''
|
|
\item \textbf{Clean Code -- Consistent Vocabulary}: Don't mix \texttt{ok}/\texttt{bad} with \texttt{accepted}/\texttt{rejected}.
|
|
\item \textbf{Clean Code -- No Abbreviations}: \texttt{amt}, \texttt{tp}, \texttt{tid} are not words.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 5: Formatting -- Semicolons and Dense Lines}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
f=open(p,'r');d=json.load(f);f.close();return d
|
|
\end{lstlisting}
|
|
\begin{lstlisting}
|
|
tp=t['type'];aid=t['account_id'];amt=t['amount'];tid=t['id']
|
|
\end{lstlisting}
|
|
\begin{lstlisting}
|
|
a['balance']=a['balance']+amt;t['status']='accepted';ok.append(t)
|
|
\end{lstlisting}
|
|
\begin{lstlisting}
|
|
if a==None:
|
|
t['reason']='account not found';bad.append(t);continue
|
|
\end{lstlisting}
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
Every statement is on its own line with proper whitespace:
|
|
\begin{lstlisting}
|
|
account = find_account(accounts, transaction["account_id"])
|
|
error = validate_common(account, transaction["amount"])
|
|
if error:
|
|
return False, error
|
|
|
|
account["balance"] += transaction["amount"]
|
|
return True, "accepted"
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\textbf{What is wrong:}
|
|
\begin{itemize}
|
|
\item \textbf{Semicolons} pack 3--4 statements onto one line, making it nearly impossible to follow the logic.
|
|
\item \textbf{No whitespace} around \texttt{=} and after commas.
|
|
\item Control flow (\texttt{continue}) is \textbf{hidden at the end of a dense line}.
|
|
\item PEP\,8 explicitly states: ``Compound statements (multiple statements on the same line) are generally discouraged.''
|
|
\end{itemize}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{PEP\,8 -- Compound Statements}: Generally discouraged. Each statement on its own line.
|
|
\item \textbf{PEP\,8 -- Whitespace}: Surround operators with spaces. Space after commas.
|
|
\item \textbf{Zen of Python}: ``Readability counts.'' ``Sparse is better than dense.''
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 6: No Context Managers for File I/O}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
def loadJ(p):
|
|
f=open(p,'r');d=json.load(f);f.close();return d
|
|
|
|
def saveJ(p,d):
|
|
f=open(p,'w');json.dump(d,f,indent=2);f.close()
|
|
\end{lstlisting}
|
|
If \texttt{json.load(f)} raises an exception, the file is \textbf{never closed} because \texttt{f.close()} is skipped. This is a resource leak.
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
def load_json(file_path: str) -> dict:
|
|
"""Read and parse a JSON file, returning the parsed data."""
|
|
with open(file_path, "r", encoding="utf-8") as file_handle:
|
|
return json.load(file_handle)
|
|
\end{lstlisting}
|
|
The \texttt{with} statement guarantees the file is closed even if an exception occurs.
|
|
\end{goodbox}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{Pythonic Code}: Always use context managers (\texttt{with}) for resource management.
|
|
\item \textbf{Clean Code -- Error Handling}: Code should be robust against exceptions. Manual \texttt{open}/\texttt{close} is error-prone.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 7: God Function -- Single Responsibility Violation}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
The function \texttt{proc()} is 38 lines long and handles \textbf{all of the following} in a single function:
|
|
\begin{itemize}[nosep]
|
|
\item Finding accounts by ID
|
|
\item Validating account status
|
|
\item Validating amounts
|
|
\item Processing deposits
|
|
\item Processing withdrawals
|
|
\item Processing transfers (including finding the target account)
|
|
\item Handling unknown transaction types
|
|
\item Building accepted and rejected lists
|
|
\end{itemize}
|
|
\begin{lstlisting}
|
|
def proc(accs,txns):
|
|
ok=[];bad=[]
|
|
for t in txns:
|
|
... # 35 lines of nested if/elif/else with continue
|
|
return accs,ok,bad
|
|
\end{lstlisting}
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
The good version splits this into \textbf{seven focused functions}:
|
|
\begin{lstlisting}
|
|
def find_account(accounts, account_id): # lookup
|
|
def validate_common(account, amount): # shared validation
|
|
def process_deposit(accounts, transaction): # deposit logic
|
|
def process_withdrawal(accounts, transaction):# withdrawal logic
|
|
def process_transfer(accounts, transaction): # transfer logic
|
|
def process_all_transactions(accounts, transactions): # orchestration
|
|
def print_results(accounts, accepted, rejected): # output
|
|
\end{lstlisting}
|
|
A dispatch dictionary replaces the \texttt{if/elif} chain:
|
|
\begin{lstlisting}
|
|
TRANSACTION_HANDLERS = {
|
|
"deposit": process_deposit,
|
|
"withdrawal": process_withdrawal,
|
|
"transfer": process_transfer,
|
|
}
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{SRP (Single Responsibility Principle)}: Each function should have one reason to change.
|
|
\item \textbf{DRY (Don't Repeat Yourself)}: The amount validation (\texttt{amt<=0}) is duplicated for deposits and transfers in the bad version; \texttt{validate\_common()} eliminates this.
|
|
\item \textbf{Clean Code -- Short Functions}: Functions should be comprehensible in a few minutes.
|
|
\item \textbf{Open-Closed Principle}: Adding a new transaction type in the bad version requires modifying the \texttt{proc()} function. In the good version, you add a new handler function and register it in the dictionary.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 8: Magic Strings Instead of Constants}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
if a['status']!='active': # magic string
|
|
...
|
|
if tp=='deposit': # magic string
|
|
...
|
|
\end{lstlisting}
|
|
The strings \texttt{'active'}, \texttt{'deposit'}, \texttt{'withdrawal'}, and \texttt{'transfer'} appear throughout the code as \textbf{literals}. If the status name ever changed, every occurrence would need to be found and updated.
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
ACTIVE_STATUS = "active"
|
|
...
|
|
if account["status"] != ACTIVE_STATUS:
|
|
\end{lstlisting}
|
|
Transaction types are handled via the \texttt{TRANSACTION\_HANDLERS} dictionary, so the string literals appear only \textbf{once} in the handler registration.
|
|
\end{goodbox}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{Clean Code -- No Magic Numbers/Strings}: Use named constants for values that carry domain meaning.
|
|
\item \textbf{DRY}: The same literal repeated in multiple places is a maintenance risk.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 9: Comparison with \texttt{None}}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
if a==None:
|
|
...
|
|
if ta==None:
|
|
...
|
|
\end{lstlisting}
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
if account is None:
|
|
...
|
|
if target is None:
|
|
...
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
PEP\,8 explicitly states: ``Comparisons to singletons like \texttt{None} should always be done with \texttt{is} or \texttt{is not}, never the equality operators.''
|
|
The \texttt{is} operator checks \textbf{identity} (the correct test for \texttt{None}), while \texttt{==} checks \textbf{equality} and can be overridden by custom \texttt{\_\_eq\_\_} methods.
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{PEP\,8 -- Programming Recommendations}: Use \texttt{is None}, not \texttt{== None}.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Violation 10: Missing \texttt{\_\_main\_\_} Guard and String Formatting}
|
|
% ============================================
|
|
|
|
\begin{badbox}
|
|
\begin{lstlisting}
|
|
main()
|
|
\end{lstlisting}
|
|
\begin{lstlisting}
|
|
print(" "+a['account_id']+" "+a['holder']+": "+str(a['balance'])
|
|
+" "+a['currency']+" ("+a['status']+")")
|
|
\end{lstlisting}
|
|
\end{badbox}
|
|
|
|
\begin{goodbox}
|
|
\begin{lstlisting}
|
|
if __name__ == "__main__":
|
|
main()
|
|
\end{lstlisting}
|
|
\begin{lstlisting}
|
|
print(
|
|
f" {account['account_id']} {account['holder']}: "
|
|
f"{account['balance']:.2f} {account['currency']} "
|
|
f"({account['status']})"
|
|
)
|
|
\end{lstlisting}
|
|
\end{goodbox}
|
|
|
|
\textbf{What is wrong:}
|
|
\begin{itemize}
|
|
\item No \texttt{\_\_main\_\_} guard means importing the module triggers execution.
|
|
\item String concatenation with \texttt{+} and \texttt{str()} is harder to read than f-strings.
|
|
\item The bad version does not format numbers (\texttt{str(5000.0)} vs.\ \texttt{5000.00}).
|
|
\end{itemize}
|
|
|
|
\begin{principlebox}[Principles Violated]
|
|
\begin{itemize}[nosep]
|
|
\item \textbf{Clean Code -- Avoid Side Effects}: Importing should not trigger execution.
|
|
\item \textbf{Pythonic Code}: Use f-strings for string formatting.
|
|
\end{itemize}
|
|
\end{principlebox}
|
|
|
|
% ============================================
|
|
\section{Summary of Violations}
|
|
% ============================================
|
|
|
|
\begin{center}
|
|
\small
|
|
\begin{tabular}{@{}rp{4.5cm}p{5.5cm}@{}}
|
|
\toprule
|
|
\textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\
|
|
\midrule
|
|
1 & Unused imports, one-line format & PEP\,8 Imports, KISS \\
|
|
2 & No docstrings, noise comments & PEP\,257, Clean Code Documentation \\
|
|
3 & Implicit data model (raw dicts) & Explicit $>$ Implicit, PEP\,484/589 \\
|
|
4 & Abbreviations, single-letter names & PEP\,8 Naming, Descriptive Names \\
|
|
5 & Semicolons, dense lines, no whitespace & PEP\,8 Whitespace, Zen of Python \\
|
|
6 & Manual file open/close & Pythonic Code, Context Managers \\
|
|
7 & God function (38-line \texttt{proc}) & SRP, DRY, Open-Closed Principle \\
|
|
8 & Magic strings & No Magic Numbers, DRY \\
|
|
9 & \texttt{== None} instead of \texttt{is None} & PEP\,8 Programming Recommendations \\
|
|
10 & No \texttt{\_\_main\_\_} guard, string concat & Side Effects, Pythonic Code \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\end{center}
|
|
|
|
\end{document}
|