AISE1_CLASS/Prompting Exercise/analyze_me_fixed.py

216 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually — in Exercise 2 the LLM will help you find them!
Can you spot the issues yourself before asking the LLM?
"""
def calculate_statistics(numbers: list[float]) -> dict[str, float]:
"""
Calculate basic statistical measures for a list of numbers.
This function computes the count, sum, average, minimum, maximum, and
variance (population variance) of the provided list of numbers.
Parameters
----------
numbers : list[float]
A list of numeric values to analyze.
Returns
-------
dict[str, float]
A dictionary containing the following keys:
- 'count': The number of elements in the list.
- 'sum': The sum of all elements.
- 'average': The arithmetic mean of the elements.
- 'min': The minimum value in the list.
- 'max': The maximum value in the list.
- 'variance': The population variance of the elements.
Raises
------
ZeroDivisionError
If the input list is empty, division by zero will occur when
calculating the average and variance.
IndexError
If the input list is empty, accessing the first element for min/max
will raise an error.
"""
# Step 2 Implement empty list handling in calculate_statistics
if not numbers:
return {
"count": 0,
"sum": 0.0,
"average": 0.0,
"min": 0.0,
"max": 0.0,
"variance": 0.0,
}
total = 0
for n in numbers:
total = total + n
average = total / len(numbers) # Bug 1: ZeroDivisionError when list is empty
min_val = numbers[0] # Bug 2: IndexError when list is empty
max_val = numbers[0]
for n in numbers:
if n < min_val:
min_val = n
if n > max_val:
max_val = n
variance = 0
for n in numbers:
variance = variance + (n - average) ** 2
# Step 3 Correct variance calculation to use sample variance
count = len(numbers)
if count > 1:
variance = variance / (count - 1)
else:
variance = 0.0
return {
"count": len(numbers),
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
# Step 4 Define type hints and docstrings for process_data
def process_data(filename: str) -> dict[str, float]:
"""
Read numeric data from a file and compute statistics.
This function opens a text file, reads each line, converts it to an integer,
and collects the values into a list. It then passes this list to
calculate_statistics to compute and return the statistical summary.
Parameters
----------
filename : str
The path to the text file containing one number per line.
Returns
-------
dict[str, float]
A dictionary containing the statistical measures computed from the file data.
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If a line in the file cannot be converted to an integer.
"""
numbers = []
# Step 5 Implement context manager and robust line parsing in process_data
with open(filename) as f:
for line in f:
stripped = line.strip()
if not stripped:
continue
try:
# Attempt to convert to float first to handle both ints and floats
value = float(stripped)
numbers.append(value)
except ValueError:
# Skip lines that cannot be converted to a number
continue
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
# Step 6 Define type hints and docstrings for normalize
def normalize(numbers: list[float], method: str = "minmax") -> list[float]:
"""
Normalize a list of numbers using the specified method.
This function applies either 'minmax' scaling or 'zscore' standardization
to the input list of numbers.
Parameters
----------
numbers : list[float]
A list of numeric values to normalize.
method : str, optional
The normalization method to use. Options are:
- 'minmax': Scales values to the range [0, 1].
- 'zscore': Standardizes values to have mean 0 and standard deviation 1.
Default is 'minmax'.
Returns
-------
list[float]
A list of normalized values.
Raises
------
ValueError
If an unknown normalization method is provided.
ZeroDivisionError
If 'minmax' is used on a list where all values are identical (range is 0),
or if 'zscore' is used on a list with zero standard deviation.
Examples
--------
>>> normalize([1, 2, 3, 4, 5])
[0.0, 0.25, 0.5, 0.75, 1.0]
"""
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
# Step 7 Fix operator precedence bug in minmax normalization
return [(x - mn) / (mx - mn) for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
return [(x - stats["average"]) / std for x in numbers]
else:
# Step 8 Replace print statement with ValueError for unknown methods
raise ValueError(f"Unknown normalisation method: {method}")
if __name__ == "__main__":
# Step 9 Implement and verify main block sanity checks
sample = [4, 8, 15, 16, 23, 42]
stats = calculate_statistics(sample)
# Verify expected values for sample data
expected_sum = 4 + 8 + 15 + 16 + 23 + 42
expected_count = 6
expected_avg = expected_sum / expected_count
assert stats["count"] == expected_count, f"Count mismatch: {stats['count']} != {expected_count}"
assert stats["sum"] == expected_sum, f"Sum mismatch: {stats['sum']} != {expected_sum}"
assert abs(stats["average"] - expected_avg) < 1e-9, f"Average mismatch: {stats['average']} != {expected_avg}"
assert stats["min"] == 4, f"Min mismatch: {stats['min']} != 4"
assert stats["max"] == 42, f"Max mismatch: {stats['max']} != 42"
# Test empty list handling
empty_stats = calculate_statistics([])
assert empty_stats["count"] == 0, "Empty list count should be 0"
assert empty_stats["sum"] == 0.0, "Empty list sum should be 0.0"
assert empty_stats["average"] == 0.0, "Empty list average should be 0.0"
assert empty_stats["min"] == 0.0, "Empty list min should be 0.0"
assert empty_stats["max"] == 0.0, "Empty list max should be 0.0"
assert empty_stats["variance"] == 0.0, "Empty list variance should be 0.0"
# Test normalization
normalized = normalize([1, 2, 3, 4, 5])
expected_normalized = [0.0, 0.25, 0.5, 0.75, 1.0]
assert len(normalized) == 5, "Normalized list length mismatch"
for i, val in enumerate(normalized):
assert abs(val - expected_normalized[i]) < 1e-9, f"Normalized value mismatch at index {i}"
print("All sanity checks passed!")