416 lines
16 KiB
TeX

\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{geometry}
\geometry{margin=2.5cm}
\usepackage{xcolor}
\usepackage{tcolorbox}
\usepackage{booktabs}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{enumitem}
\definecolor{seblue}{rgb}{0.0,0.28,0.67}
\definecolor{segreen}{rgb}{0.13,0.55,0.13}
\definecolor{sered}{rgb}{0.7,0.13,0.13}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\lstdefinestyle{pystyle}{
backgroundcolor=\color{backcolour},
commentstyle=\color{codegreen},
keywordstyle=\color{blue},
stringstyle=\color{codepurple},
basicstyle=\ttfamily\footnotesize,
breaklines=true,
keepspaces=true,
showstringspaces=false,
tabsize=4,
language=Python
}
\lstset{style=pystyle}
\newtcolorbox{badbox}{
colback=red!5!white,
colframe=sered,
title=Bad Code,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\newtcolorbox{goodbox}{
colback=green!5!white,
colframe=segreen,
title=Clean Code,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\newtcolorbox{principlebox}[1][]{
colback=blue!5!white,
colframe=seblue,
title=#1,
fonttitle=\bfseries\small,
boxrule=0.8pt, arc=2pt,
top=2pt, bottom=2pt, left=4pt, right=4pt
}
\title{\textcolor{seblue}{Code Analysis: Arithmetic Expression Calculator}\\[0.3em]
\large What Makes Code Bad and How to Fix It\\[0.3em]
\normalsize AISE501 -- AI in Software Engineering I}
\author{Dr.\ Florian Herzog}
\date{Spring Semester 2026}
\begin{document}
\maketitle
\tableofcontents
\newpage
% ============================================
\section{Overview}
% ============================================
This document analyses two implementations of the same program --- an arithmetic expression calculator that parses and evaluates strings like \texttt{"3 + 5 * 2"} without using Python's \texttt{eval()}.
Both produce correct results, but the first version (\texttt{calculator\_bad.py}) violates numerous PEP\,8 and clean code principles, while the second (\texttt{calculator\_good.py}) follows them consistently.
The analysis is structured by violation category, with side-by-side comparisons of the bad and good code and references to the specific PEP\,8 rules or clean code principles that apply.
% ============================================
\section{Violation 1: Unused and Poorly Formatted Imports}
% ============================================
\begin{badbox}
\begin{lstlisting}
import sys,os,re;from typing import *
\end{lstlisting}
\end{badbox}
\textbf{What is wrong:}
\begin{itemize}
\item \texttt{sys}, \texttt{os}, and \texttt{re} are imported but \textbf{never used} anywhere in the code.
\item Multiple imports are crammed onto \textbf{one line separated by commas}, violating PEP\,8's rule that imports should be on separate lines.
\item A \textbf{semicolon} joins two import statements on one line.
\item \texttt{from typing import *} is a \textbf{wildcard import} that pollutes the namespace.
\end{itemize}
\begin{goodbox}
The good version has \textbf{no imports at all} --- the calculator uses only built-in Python features.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Imports}: ``Imports should usually be on separate lines.'' Wildcard imports (\texttt{from X import *}) should be avoided.
\item \textbf{KISS}: Unused imports add unnecessary complexity.
\item \textbf{Clean Code}: Dead code (unused imports) confuses readers about dependencies.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 2: No Module Docstring or Documentation}
% ============================================
\begin{badbox}
\begin{lstlisting}
# calculator program
def scicalc(s):
\end{lstlisting}
The only ``documentation'' is a single vague comment. No module docstring, no function docstrings.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
"""Simple arithmetic expression calculator with a recursive-descent parser.
Supported operations: +, -, *, / and parentheses.
Does NOT use Python's eval().
Grammar:
expression = term (('+' | '-') term)*
term = factor (('*' | '/') factor)*
factor = NUMBER | '(' expression ')'
"""
\end{lstlisting}
The good version opens with a module docstring that explains the purpose, supported operations, and even the formal grammar. Every function also has a docstring.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,257}: All public modules, functions, classes, and methods should have docstrings.
\item \textbf{Clean Code -- Documentation}: Good documentation helps current and future developers understand the intent.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 3: Poor Naming Conventions}
% ============================================
\begin{badbox}
\begin{lstlisting}
def scicalc(s): # What does "scicalc" mean?
def doPlusMinus(s,a,b):# camelCase, not snake_case
def doMulDiv(s,a,b): # "do" is vague
def getNum(s, a,b): # inconsistent spacing
t=s[a:b] # "t" for what?
c=t[i] # "c" for what?
L=doPlusMinus(...) # uppercase "L" for a local variable
R=doMulDiv(...) # uppercase "R" for a local variable
r=doPlusMinus(...) # "r" for result?
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
def tokenize(expression_text):
def parse_expression(tokens, position):
def parse_term(tokens, position):
def parse_factor(tokens, position):
def calculate(expression_text):
character = expression_text[position]
operator = tokens[position]
right_value, position = parse_term(tokens, position)
result, final_position = parse_expression(tokens, 0)
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong in the bad version:}
\begin{itemize}
\item Function names use \textbf{camelCase} (\texttt{doPlusMinus}) instead of \textbf{snake\_case}.
\item Variable names are \textbf{single letters} (\texttt{s}, \texttt{a}, \texttt{b}, \texttt{t}, \texttt{c}, \texttt{r}) --- impossible to understand without reading every line.
\item \texttt{L} and \texttt{R} use \textbf{uppercase} for local variables, which PEP\,8 reserves for constants.
\item Names like \texttt{scicalc} are \textbf{abbreviations} that are not pronounceable or self-explanatory.
\item The list of test data is called \texttt{Data} (capitalised like a class) and results \texttt{Res}.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}.
\item \textbf{Clean Code -- Descriptive Names}: Names should reveal intent. A reader should know what a variable holds without tracing its assignment.
\item \textbf{Clean Code -- Pronounceable Names}: \texttt{scicalc} is not a word anyone would say in a conversation.
\item \textbf{Clean Code -- No Abbreviations}: \texttt{doPlusMinus} is better than \texttt{dPM}, but \texttt{parse\_expression} communicates the actual operation.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 4: Formatting and Whitespace}
% ============================================
\begin{badbox}
\begin{lstlisting}
def scicalc(s):
s=s.replace(' ','') # 2-space indent
if s=='':return 0 # no spaces around ==
r=doPlusMinus(s,0,len(s))
return r
def doPlusMinus(s,a,b):
t=s[a:b]; level=0; i=len(t)-1 # 4-space indent, semicolons
while i>=0: # no space around >=
if level==0 and(c=='*' or c=='/'): # missing space before (
L = doMulDiv(s,a,a+i); R = getNum(s,a+i+1,b)
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
def parse_expression(tokens, position):
result, position = parse_term(tokens, position)
while position < len(tokens) and tokens[position] in ("+", "-"):
operator = tokens[position]
position += 1
right_value, position = parse_term(tokens, position)
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong:}
\begin{itemize}
\item \textbf{Inconsistent indentation}: \texttt{scicalc} uses 2 spaces, other functions use 4 spaces. PEP\,8 requires 4 spaces consistently.
\item \textbf{Semicolons} to put multiple statements on one line (\texttt{t=s[a:b]; level=0; i=len(t)-1}).
\item \textbf{Missing whitespace} around operators: \texttt{s=s.replace}, \texttt{i>=0}, \texttt{level==0 and(c==...}.
\item \textbf{No blank lines} between logical sections within functions or between function definitions. PEP\,8 requires two blank lines before and after top-level functions.
\item Multiple \texttt{return} or assignment statements \textbf{on the same line} as \texttt{if}: \texttt{if s=='':return 0}.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Indentation}: Use 4 spaces per indentation level.
\item \textbf{PEP\,8 -- Whitespace}: Surround binary operators with single spaces. Avoid compound statements on one line.
\item \textbf{PEP\,8 -- Blank Lines}: Two blank lines around top-level definitions.
\item \textbf{Zen of Python}: ``Sparse is better than dense.''
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 5: Error Handling}
% ============================================
\begin{badbox}
\begin{lstlisting}
if R==0:print("ERROR division by zero!!!") ;return 0
\end{lstlisting}
\begin{lstlisting}
try:
x = float(t)
except:
print("bad number: "+t);x=0
return x
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
if right_value == 0:
raise ZeroDivisionError("Division by zero")
\end{lstlisting}
\begin{lstlisting}
try:
tokens = tokenize(expression_text)
result, final_position = parse_expression(tokens, 0)
...
except (ValueError, ZeroDivisionError) as error:
return f"Error: {error}"
\end{lstlisting}
\end{goodbox}
\textbf{What is wrong in the bad version:}
\begin{itemize}
\item \textbf{Bare \texttt{except}} catches every exception including \texttt{KeyboardInterrupt} and \texttt{SystemExit} --- masking real bugs.
\item Errors are handled by \textbf{printing and returning a dummy value} (0), which silently produces wrong results. The caller has no way to know an error occurred.
\item The error message style is inconsistent: \texttt{"ERROR division by zero!!!"} vs.\ \texttt{"bad number: ..."}.
\end{itemize}
\textbf{What the good version does:}
\begin{itemize}
\item Errors \textbf{raise specific exceptions} (\texttt{ValueError}, \texttt{ZeroDivisionError}) at the point of detection.
\item The top-level \texttt{calculate()} function catches \textbf{only expected exceptions} and returns a formatted error string.
\item Errors \textbf{propagate} rather than being silently swallowed.
\end{itemize}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{PEP\,8 -- Exceptions}: Catch specific exceptions, never use bare \texttt{except}.
\item \textbf{Zen of Python}: ``Errors should never pass silently. Unless explicitly silenced.''
\item \textbf{Clean Code -- Error Handling}: Anticipate errors and handle them gracefully. Returning magic values (0 for an error) is an anti-pattern.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 6: Function Structure and Single Responsibility}
% ============================================
\begin{badbox}
The bad version has three intertwined functions (\texttt{doPlusMinus}, \texttt{doMulDiv}, \texttt{getNum}) that each take the \textbf{entire string plus two index parameters} and internally slice the string. Parsing, tokenisation, and evaluation are all mixed together.
\begin{lstlisting}
def doPlusMinus(s,a,b):
t=s[a:b]; level=0; i=len(t)-1
while i>=0:
...
L=doPlusMinus(s,a,a+i);R=doMulDiv(s,a+i+1,b)
...
return doMulDiv(s,a,b)
\end{lstlisting}
\end{badbox}
\begin{goodbox}
The good version separates \textbf{tokenisation} from \textbf{parsing}:
\begin{lstlisting}
tokens = tokenize(expression_text) # Step 1: tokenise
result, position = parse_expression(tokens, 0) # Step 2: parse
\end{lstlisting}
Each parser function has a single, clear responsibility:
\begin{itemize}[nosep]
\item \texttt{tokenize()} -- converts text to tokens
\item \texttt{parse\_expression()} -- handles \texttt{+} and \texttt{-}
\item \texttt{parse\_term()} -- handles \texttt{*} and \texttt{/}
\item \texttt{parse\_factor()} -- handles numbers and parentheses
\item \texttt{calculate()} -- orchestrates the pipeline and error handling
\end{itemize}
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{SRP (Single Responsibility Principle)}: Each function should do one thing.
\item \textbf{SoC (Separation of Concerns)}: Tokenisation and parsing are different concerns.
\item \textbf{Clean Code -- Short Functions}: If a function takes more than a few minutes to comprehend, it should be refactored.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 7: Missing \texttt{\_\_main\_\_} Guard}
% ============================================
\begin{badbox}
\begin{lstlisting}
main()
\end{lstlisting}
The bad version calls \texttt{main()} at the module level. If another script imports this file, the calculator runs immediately as a side effect.
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
if __name__ == "__main__":
main()
\end{lstlisting}
The good version uses the standard \texttt{\_\_main\_\_} guard, so the module can be safely imported without executing the calculator.
\end{goodbox}
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Clean Code -- Avoid Side Effects}: Importing a module should not trigger execution.
\item \textbf{Python Best Practice}: The \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard is standard for all runnable scripts.
\end{itemize}
\end{principlebox}
% ============================================
\section{Violation 8: String Concatenation Instead of f-Strings}
% ============================================
\begin{badbox}
\begin{lstlisting}
print(d+" = "+str(Res))
\end{lstlisting}
\end{badbox}
\begin{goodbox}
\begin{lstlisting}
print(f"{display_expr} = {result}")
\end{lstlisting}
\end{goodbox}
String concatenation with \texttt{+} and manual \texttt{str()} calls is harder to read than f-strings, which are the idiomatic Python 3.6+ way to format output.
\begin{principlebox}[Principles Violated]
\begin{itemize}[nosep]
\item \textbf{Pythonic Code}: Use f-strings for string formatting (readable, efficient).
\item \textbf{Clean Code -- Readability}: f-strings make the output format immediately visible.
\end{itemize}
\end{principlebox}
% ============================================
\section{Summary of Violations}
% ============================================
\begin{center}
\small
\begin{tabular}{@{}rp{5cm}p{5.5cm}@{}}
\toprule
\textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\
\midrule
1 & Unused imports, wildcard import, one-line imports & PEP\,8 Imports, KISS \\
2 & No docstrings or documentation & PEP\,257, Clean Code Documentation \\
3 & camelCase names, single-letter variables, abbreviations & PEP\,8 Naming, Descriptive Names \\
4 & Inconsistent indent, semicolons, missing whitespace & PEP\,8 Indentation \& Whitespace \\
5 & Bare except, silent error swallowing & PEP\,8 Exceptions, Zen of Python \\
6 & Mixed concerns, long tangled functions & SRP, SoC, Short Functions \\
7 & No \texttt{\_\_main\_\_} guard & Avoid Side Effects \\
8 & String concatenation instead of f-strings & Pythonic Code, Readability \\
\bottomrule
\end{tabular}
\end{center}
\end{document}