\documentclass[12pt,a4paper]{article} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage[english]{babel} \usepackage{geometry} \geometry{margin=2.5cm} \usepackage{xcolor} \usepackage{tcolorbox} \usepackage{booktabs} \usepackage{hyperref} \usepackage{listings} \usepackage{enumitem} \definecolor{seblue}{rgb}{0.0,0.28,0.67} \definecolor{segreen}{rgb}{0.13,0.55,0.13} \definecolor{sered}{rgb}{0.7,0.13,0.13} \definecolor{backcolour}{rgb}{0.95,0.95,0.92} \definecolor{codegreen}{rgb}{0,0.6,0} \definecolor{codepurple}{rgb}{0.58,0,0.82} \lstdefinestyle{pystyle}{ backgroundcolor=\color{backcolour}, commentstyle=\color{codegreen}, keywordstyle=\color{blue}, stringstyle=\color{codepurple}, basicstyle=\ttfamily\footnotesize, breaklines=true, keepspaces=true, showstringspaces=false, tabsize=4, language=Python } \lstset{style=pystyle} \newtcolorbox{badbox}{ colback=red!5!white, colframe=sered, title=Bad Code, fonttitle=\bfseries\small, boxrule=0.8pt, arc=2pt, top=2pt, bottom=2pt, left=4pt, right=4pt } \newtcolorbox{goodbox}{ colback=green!5!white, colframe=segreen, title=Clean Code, fonttitle=\bfseries\small, boxrule=0.8pt, arc=2pt, top=2pt, bottom=2pt, left=4pt, right=4pt } \newtcolorbox{principlebox}[1][]{ colback=blue!5!white, colframe=seblue, title=#1, fonttitle=\bfseries\small, boxrule=0.8pt, arc=2pt, top=2pt, bottom=2pt, left=4pt, right=4pt } \title{\textcolor{seblue}{Code Analysis: Arithmetic Expression Calculator}\\[0.3em] \large What Makes Code Bad and How to Fix It\\[0.3em] \normalsize AISE501 -- AI in Software Engineering I} \author{Dr.\ Florian Herzog} \date{Spring Semester 2026} \begin{document} \maketitle \tableofcontents \newpage % ============================================ \section{Overview} % ============================================ This document analyses two implementations of the same program --- an arithmetic expression calculator that parses and evaluates strings like \texttt{"3 + 5 * 2"} without using Python's \texttt{eval()}. Both produce correct results, but the first version (\texttt{calculator\_bad.py}) violates numerous PEP\,8 and clean code principles, while the second (\texttt{calculator\_good.py}) follows them consistently. The analysis is structured by violation category, with side-by-side comparisons of the bad and good code and references to the specific PEP\,8 rules or clean code principles that apply. % ============================================ \section{Violation 1: Unused and Poorly Formatted Imports} % ============================================ \begin{badbox} \begin{lstlisting} import sys,os,re;from typing import * \end{lstlisting} \end{badbox} \textbf{What is wrong:} \begin{itemize} \item \texttt{sys}, \texttt{os}, and \texttt{re} are imported but \textbf{never used} anywhere in the code. \item Multiple imports are crammed onto \textbf{one line separated by commas}, violating PEP\,8's rule that imports should be on separate lines. \item A \textbf{semicolon} joins two import statements on one line. \item \texttt{from typing import *} is a \textbf{wildcard import} that pollutes the namespace. \end{itemize} \begin{goodbox} The good version has \textbf{no imports at all} --- the calculator uses only built-in Python features. \end{goodbox} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{PEP\,8 -- Imports}: ``Imports should usually be on separate lines.'' Wildcard imports (\texttt{from X import *}) should be avoided. \item \textbf{KISS}: Unused imports add unnecessary complexity. \item \textbf{Clean Code}: Dead code (unused imports) confuses readers about dependencies. \end{itemize} \end{principlebox} % ============================================ \section{Violation 2: No Module Docstring or Documentation} % ============================================ \begin{badbox} \begin{lstlisting} # calculator program def scicalc(s): \end{lstlisting} The only ``documentation'' is a single vague comment. No module docstring, no function docstrings. \end{badbox} \begin{goodbox} \begin{lstlisting} """Simple arithmetic expression calculator with a recursive-descent parser. Supported operations: +, -, *, / and parentheses. Does NOT use Python's eval(). Grammar: expression = term (('+' | '-') term)* term = factor (('*' | '/') factor)* factor = NUMBER | '(' expression ')' """ \end{lstlisting} The good version opens with a module docstring that explains the purpose, supported operations, and even the formal grammar. Every function also has a docstring. \end{goodbox} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{PEP\,257}: All public modules, functions, classes, and methods should have docstrings. \item \textbf{Clean Code -- Documentation}: Good documentation helps current and future developers understand the intent. \end{itemize} \end{principlebox} % ============================================ \section{Violation 3: Poor Naming Conventions} % ============================================ \begin{badbox} \begin{lstlisting} def scicalc(s): # What does "scicalc" mean? def doPlusMinus(s,a,b):# camelCase, not snake_case def doMulDiv(s,a,b): # "do" is vague def getNum(s, a,b): # inconsistent spacing t=s[a:b] # "t" for what? c=t[i] # "c" for what? L=doPlusMinus(...) # uppercase "L" for a local variable R=doMulDiv(...) # uppercase "R" for a local variable r=doPlusMinus(...) # "r" for result? \end{lstlisting} \end{badbox} \begin{goodbox} \begin{lstlisting} def tokenize(expression_text): def parse_expression(tokens, position): def parse_term(tokens, position): def parse_factor(tokens, position): def calculate(expression_text): character = expression_text[position] operator = tokens[position] right_value, position = parse_term(tokens, position) result, final_position = parse_expression(tokens, 0) \end{lstlisting} \end{goodbox} \textbf{What is wrong in the bad version:} \begin{itemize} \item Function names use \textbf{camelCase} (\texttt{doPlusMinus}) instead of \textbf{snake\_case}. \item Variable names are \textbf{single letters} (\texttt{s}, \texttt{a}, \texttt{b}, \texttt{t}, \texttt{c}, \texttt{r}) --- impossible to understand without reading every line. \item \texttt{L} and \texttt{R} use \textbf{uppercase} for local variables, which PEP\,8 reserves for constants. \item Names like \texttt{scicalc} are \textbf{abbreviations} that are not pronounceable or self-explanatory. \item The list of test data is called \texttt{Data} (capitalised like a class) and results \texttt{Res}. \end{itemize} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{PEP\,8 -- Naming}: Functions and variables use \texttt{lower\_case\_with\_underscores}. Constants use \texttt{UPPER\_CASE}. \item \textbf{Clean Code -- Descriptive Names}: Names should reveal intent. A reader should know what a variable holds without tracing its assignment. \item \textbf{Clean Code -- Pronounceable Names}: \texttt{scicalc} is not a word anyone would say in a conversation. \item \textbf{Clean Code -- No Abbreviations}: \texttt{doPlusMinus} is better than \texttt{dPM}, but \texttt{parse\_expression} communicates the actual operation. \end{itemize} \end{principlebox} % ============================================ \section{Violation 4: Formatting and Whitespace} % ============================================ \begin{badbox} \begin{lstlisting} def scicalc(s): s=s.replace(' ','') # 2-space indent if s=='':return 0 # no spaces around == r=doPlusMinus(s,0,len(s)) return r def doPlusMinus(s,a,b): t=s[a:b]; level=0; i=len(t)-1 # 4-space indent, semicolons while i>=0: # no space around >= if level==0 and(c=='*' or c=='/'): # missing space before ( L = doMulDiv(s,a,a+i); R = getNum(s,a+i+1,b) \end{lstlisting} \end{badbox} \begin{goodbox} \begin{lstlisting} def parse_expression(tokens, position): result, position = parse_term(tokens, position) while position < len(tokens) and tokens[position] in ("+", "-"): operator = tokens[position] position += 1 right_value, position = parse_term(tokens, position) \end{lstlisting} \end{goodbox} \textbf{What is wrong:} \begin{itemize} \item \textbf{Inconsistent indentation}: \texttt{scicalc} uses 2 spaces, other functions use 4 spaces. PEP\,8 requires 4 spaces consistently. \item \textbf{Semicolons} to put multiple statements on one line (\texttt{t=s[a:b]; level=0; i=len(t)-1}). \item \textbf{Missing whitespace} around operators: \texttt{s=s.replace}, \texttt{i>=0}, \texttt{level==0 and(c==...}. \item \textbf{No blank lines} between logical sections within functions or between function definitions. PEP\,8 requires two blank lines before and after top-level functions. \item Multiple \texttt{return} or assignment statements \textbf{on the same line} as \texttt{if}: \texttt{if s=='':return 0}. \end{itemize} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{PEP\,8 -- Indentation}: Use 4 spaces per indentation level. \item \textbf{PEP\,8 -- Whitespace}: Surround binary operators with single spaces. Avoid compound statements on one line. \item \textbf{PEP\,8 -- Blank Lines}: Two blank lines around top-level definitions. \item \textbf{Zen of Python}: ``Sparse is better than dense.'' \end{itemize} \end{principlebox} % ============================================ \section{Violation 5: Error Handling} % ============================================ \begin{badbox} \begin{lstlisting} if R==0:print("ERROR division by zero!!!") ;return 0 \end{lstlisting} \begin{lstlisting} try: x = float(t) except: print("bad number: "+t);x=0 return x \end{lstlisting} \end{badbox} \begin{goodbox} \begin{lstlisting} if right_value == 0: raise ZeroDivisionError("Division by zero") \end{lstlisting} \begin{lstlisting} try: tokens = tokenize(expression_text) result, final_position = parse_expression(tokens, 0) ... except (ValueError, ZeroDivisionError) as error: return f"Error: {error}" \end{lstlisting} \end{goodbox} \textbf{What is wrong in the bad version:} \begin{itemize} \item \textbf{Bare \texttt{except}} catches every exception including \texttt{KeyboardInterrupt} and \texttt{SystemExit} --- masking real bugs. \item Errors are handled by \textbf{printing and returning a dummy value} (0), which silently produces wrong results. The caller has no way to know an error occurred. \item The error message style is inconsistent: \texttt{"ERROR division by zero!!!"} vs.\ \texttt{"bad number: ..."}. \end{itemize} \textbf{What the good version does:} \begin{itemize} \item Errors \textbf{raise specific exceptions} (\texttt{ValueError}, \texttt{ZeroDivisionError}) at the point of detection. \item The top-level \texttt{calculate()} function catches \textbf{only expected exceptions} and returns a formatted error string. \item Errors \textbf{propagate} rather than being silently swallowed. \end{itemize} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{PEP\,8 -- Exceptions}: Catch specific exceptions, never use bare \texttt{except}. \item \textbf{Zen of Python}: ``Errors should never pass silently. Unless explicitly silenced.'' \item \textbf{Clean Code -- Error Handling}: Anticipate errors and handle them gracefully. Returning magic values (0 for an error) is an anti-pattern. \end{itemize} \end{principlebox} % ============================================ \section{Violation 6: Function Structure and Single Responsibility} % ============================================ \begin{badbox} The bad version has three intertwined functions (\texttt{doPlusMinus}, \texttt{doMulDiv}, \texttt{getNum}) that each take the \textbf{entire string plus two index parameters} and internally slice the string. Parsing, tokenisation, and evaluation are all mixed together. \begin{lstlisting} def doPlusMinus(s,a,b): t=s[a:b]; level=0; i=len(t)-1 while i>=0: ... L=doPlusMinus(s,a,a+i);R=doMulDiv(s,a+i+1,b) ... return doMulDiv(s,a,b) \end{lstlisting} \end{badbox} \begin{goodbox} The good version separates \textbf{tokenisation} from \textbf{parsing}: \begin{lstlisting} tokens = tokenize(expression_text) # Step 1: tokenise result, position = parse_expression(tokens, 0) # Step 2: parse \end{lstlisting} Each parser function has a single, clear responsibility: \begin{itemize}[nosep] \item \texttt{tokenize()} -- converts text to tokens \item \texttt{parse\_expression()} -- handles \texttt{+} and \texttt{-} \item \texttt{parse\_term()} -- handles \texttt{*} and \texttt{/} \item \texttt{parse\_factor()} -- handles numbers and parentheses \item \texttt{calculate()} -- orchestrates the pipeline and error handling \end{itemize} \end{goodbox} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{SRP (Single Responsibility Principle)}: Each function should do one thing. \item \textbf{SoC (Separation of Concerns)}: Tokenisation and parsing are different concerns. \item \textbf{Clean Code -- Short Functions}: If a function takes more than a few minutes to comprehend, it should be refactored. \end{itemize} \end{principlebox} % ============================================ \section{Violation 7: Missing \texttt{\_\_main\_\_} Guard} % ============================================ \begin{badbox} \begin{lstlisting} main() \end{lstlisting} The bad version calls \texttt{main()} at the module level. If another script imports this file, the calculator runs immediately as a side effect. \end{badbox} \begin{goodbox} \begin{lstlisting} if __name__ == "__main__": main() \end{lstlisting} The good version uses the standard \texttt{\_\_main\_\_} guard, so the module can be safely imported without executing the calculator. \end{goodbox} \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{Clean Code -- Avoid Side Effects}: Importing a module should not trigger execution. \item \textbf{Python Best Practice}: The \texttt{if \_\_name\_\_ == "\_\_main\_\_"} guard is standard for all runnable scripts. \end{itemize} \end{principlebox} % ============================================ \section{Violation 8: String Concatenation Instead of f-Strings} % ============================================ \begin{badbox} \begin{lstlisting} print(d+" = "+str(Res)) \end{lstlisting} \end{badbox} \begin{goodbox} \begin{lstlisting} print(f"{display_expr} = {result}") \end{lstlisting} \end{goodbox} String concatenation with \texttt{+} and manual \texttt{str()} calls is harder to read than f-strings, which are the idiomatic Python 3.6+ way to format output. \begin{principlebox}[Principles Violated] \begin{itemize}[nosep] \item \textbf{Pythonic Code}: Use f-strings for string formatting (readable, efficient). \item \textbf{Clean Code -- Readability}: f-strings make the output format immediately visible. \end{itemize} \end{principlebox} % ============================================ \section{Summary of Violations} % ============================================ \begin{center} \small \begin{tabular}{@{}rp{5cm}p{5.5cm}@{}} \toprule \textbf{\#} & \textbf{Violation} & \textbf{Principle / PEP\,8 Rule} \\ \midrule 1 & Unused imports, wildcard import, one-line imports & PEP\,8 Imports, KISS \\ 2 & No docstrings or documentation & PEP\,257, Clean Code Documentation \\ 3 & camelCase names, single-letter variables, abbreviations & PEP\,8 Naming, Descriptive Names \\ 4 & Inconsistent indent, semicolons, missing whitespace & PEP\,8 Indentation \& Whitespace \\ 5 & Bare except, silent error swallowing & PEP\,8 Exceptions, Zen of Python \\ 6 & Mixed concerns, long tangled functions & SRP, SoC, Short Functions \\ 7 & No \texttt{\_\_main\_\_} guard & Avoid Side Effects \\ 8 & String concatenation instead of f-strings & Pythonic Code, Readability \\ \bottomrule \end{tabular} \end{center} \end{document}