""" analyze_me.py – A data-processing script used in Exercise 2 ============================================================== This file contains several realistic bugs and style issues. Do NOT fix them manually — in Exercise 2 the LLM will help you find them! Can you spot the issues yourself before asking the LLM? """ def calculate_statistics(numbers: list[float]) -> dict[str, float]: """ Calculate basic statistical measures for a list of numbers. This function computes the count, sum, average, minimum, maximum, and variance (population variance) of the provided list of numbers. Parameters ---------- numbers : list[float] A list of numeric values to analyze. Returns ------- dict[str, float] A dictionary containing the following keys: - 'count': The number of elements in the list. - 'sum': The sum of all elements. - 'average': The arithmetic mean of the elements. - 'min': The minimum value in the list. - 'max': The maximum value in the list. - 'variance': The population variance of the elements. Raises ------ ZeroDivisionError If the input list is empty, division by zero will occur when calculating the average and variance. IndexError If the input list is empty, accessing the first element for min/max will raise an error. """ # Step 2 – Implement empty list handling in calculate_statistics if not numbers: return { "count": 0, "sum": 0.0, "average": 0.0, "min": 0.0, "max": 0.0, "variance": 0.0, } total = 0 for n in numbers: total = total + n average = total / len(numbers) # Bug 1: ZeroDivisionError when list is empty min_val = numbers[0] # Bug 2: IndexError when list is empty max_val = numbers[0] for n in numbers: if n < min_val: min_val = n if n > max_val: max_val = n variance = 0 for n in numbers: variance = variance + (n - average) ** 2 # Step 3 – Correct variance calculation to use sample variance count = len(numbers) if count > 1: variance = variance / (count - 1) else: variance = 0.0 return { "count": len(numbers), "sum": total, "average": average, "min": min_val, "max": max_val, "variance": variance, } # Step 4 – Define type hints and docstrings for process_data def process_data(filename: str) -> dict[str, float]: """ Read numeric data from a file and compute statistics. This function opens a text file, reads each line, converts it to an integer, and collects the values into a list. It then passes this list to calculate_statistics to compute and return the statistical summary. Parameters ---------- filename : str The path to the text file containing one number per line. Returns ------- dict[str, float] A dictionary containing the statistical measures computed from the file data. Raises ------ FileNotFoundError If the specified file does not exist. ValueError If a line in the file cannot be converted to an integer. """ numbers = [] # Step 5 – Implement context manager and robust line parsing in process_data with open(filename) as f: for line in f: stripped = line.strip() if not stripped: continue try: # Attempt to convert to float first to handle both ints and floats value = float(stripped) numbers.append(value) except ValueError: # Skip lines that cannot be converted to a number continue result = calculate_statistics(numbers) print("Statistics:", result) return result # Step 6 – Define type hints and docstrings for normalize def normalize(numbers: list[float], method: str = "minmax") -> list[float]: """ Normalize a list of numbers using the specified method. This function applies either 'minmax' scaling or 'zscore' standardization to the input list of numbers. Parameters ---------- numbers : list[float] A list of numeric values to normalize. method : str, optional The normalization method to use. Options are: - 'minmax': Scales values to the range [0, 1]. - 'zscore': Standardizes values to have mean 0 and standard deviation 1. Default is 'minmax'. Returns ------- list[float] A list of normalized values. Raises ------ ValueError If an unknown normalization method is provided. ZeroDivisionError If 'minmax' is used on a list where all values are identical (range is 0), or if 'zscore' is used on a list with zero standard deviation. Examples -------- >>> normalize([1, 2, 3, 4, 5]) [0.0, 0.25, 0.5, 0.75, 1.0] """ if method == "minmax": mn = min(numbers) mx = max(numbers) # Step 7 – Fix operator precedence bug in minmax normalization return [(x - mn) / (mx - mn) for x in numbers] elif method == "zscore": stats = calculate_statistics(numbers) std = stats["variance"] ** 0.5 return [(x - stats["average"]) / std for x in numbers] else: # Step 8 – Replace print statement with ValueError for unknown methods raise ValueError(f"Unknown normalisation method: {method}") if __name__ == "__main__": # Step 9 – Implement and verify main block sanity checks sample = [4, 8, 15, 16, 23, 42] stats = calculate_statistics(sample) # Verify expected values for sample data expected_sum = 4 + 8 + 15 + 16 + 23 + 42 expected_count = 6 expected_avg = expected_sum / expected_count assert stats["count"] == expected_count, f"Count mismatch: {stats['count']} != {expected_count}" assert stats["sum"] == expected_sum, f"Sum mismatch: {stats['sum']} != {expected_sum}" assert abs(stats["average"] - expected_avg) < 1e-9, f"Average mismatch: {stats['average']} != {expected_avg}" assert stats["min"] == 4, f"Min mismatch: {stats['min']} != 4" assert stats["max"] == 42, f"Max mismatch: {stats['max']} != 42" # Test empty list handling empty_stats = calculate_statistics([]) assert empty_stats["count"] == 0, "Empty list count should be 0" assert empty_stats["sum"] == 0.0, "Empty list sum should be 0.0" assert empty_stats["average"] == 0.0, "Empty list average should be 0.0" assert empty_stats["min"] == 0.0, "Empty list min should be 0.0" assert empty_stats["max"] == 0.0, "Empty list max should be 0.0" assert empty_stats["variance"] == 0.0, "Empty list variance should be 0.0" # Test normalization normalized = normalize([1, 2, 3, 4, 5]) expected_normalized = [0.0, 0.25, 0.5, 0.75, 1.0] assert len(normalized) == 5, "Normalized list length mismatch" for i, val in enumerate(normalized): assert abs(val - expected_normalized[i]) < 1e-9, f"Normalized value mismatch at index {i}" print("All sanity checks passed!")