68 lines
2.1 KiB
Python
68 lines
2.1 KiB
Python
"""
|
||
analyze_me.py – A data-processing script used in Exercise 2
|
||
==============================================================
|
||
This file contains several realistic bugs and style issues.
|
||
Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
|
||
|
||
Can you spot the issues yourself before asking the LLM?
|
||
"""
|
||
|
||
|
||
def calculate_statistics(numbers):
|
||
total = 0
|
||
for n in numbers:
|
||
total = total + n
|
||
average = total / len(numbers) # Bug 1: ZeroDivisionError when list is empty
|
||
|
||
min_val = numbers[0] # Bug 2: IndexError when list is empty
|
||
max_val = numbers[0]
|
||
for n in numbers:
|
||
if n < min_val:
|
||
min_val = n
|
||
if n > max_val:
|
||
max_val = n
|
||
|
||
variance = 0
|
||
for n in numbers:
|
||
variance = variance + (n - average) ** 2
|
||
variance = variance / len(numbers) # Bug 3: population variance (÷N), not sample variance (÷N-1)
|
||
|
||
return {
|
||
"count": len(numbers),
|
||
"sum": total,
|
||
"average": average,
|
||
"min": min_val,
|
||
"max": max_val,
|
||
"variance": variance,
|
||
}
|
||
|
||
|
||
def process_data(filename):
|
||
numbers = []
|
||
f = open(filename) # Bug 4: no context manager (file may not be closed on error)
|
||
for line in f:
|
||
numbers.append(int(line.strip())) # Bug 5: int() crashes on floats and blank lines
|
||
f.close()
|
||
|
||
result = calculate_statistics(numbers)
|
||
print("Statistics:", result)
|
||
return result
|
||
|
||
|
||
def normalize(numbers, method="minmax"):
|
||
if method == "minmax":
|
||
mn = min(numbers)
|
||
mx = max(numbers)
|
||
return [(x - mn) / mx - mn for x in numbers] # Bug 6: operator-precedence error
|
||
elif method == "zscore":
|
||
stats = calculate_statistics(numbers)
|
||
std = stats["variance"] ** 0.5
|
||
return [(x - stats["average"]) / std for x in numbers]
|
||
else:
|
||
print("Unknown normalisation method") # Bug 7: should raise ValueError, not just print
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sample = [4, 8, 15, 16, 23, 42]
|
||
print(calculate_statistics(sample))
|