""" analyze_me.py – A data-processing script used in Exercise 2 ============================================================== This module provides robust functions for calculating statistics, processing data files, and normalizing numeric lists. All functions include PEP-484 type hints and NumPy-style docstrings. """ from typing import List, Dict, Union, Any def calculate_statistics(numbers: List[Union[int, float]]) -> Dict[str, Any]: """ Calculate basic statistics for a list of numbers. Parameters ---------- numbers : List[Union[int, float]] The list of numeric values to analyze. Returns ------- Dict[str, Any] A dictionary containing count, sum, average, min, max, and variance. If the input list is empty, returns a dictionary with zero values for all fields except count (which is 0). Notes ----- - Variance is calculated using the sample variance formula (dividing by N-1). - If the list is empty, the function returns early to avoid division by zero or index errors. """ count = len(numbers) if count == 0: return { "count": 0, "sum": 0.0, "average": 0.0, "min": 0.0, "max": 0.0, "variance": 0.0, } total = sum(numbers) average = total / count min_val = min(numbers) max_val = max(numbers) # Calculate sample variance (divide by N-1) variance_sum = sum((n - average) ** 2 for n in numbers) variance = variance_sum / (count - 1) return { "count": count, "sum": total, "average": average, "min": min_val, "max": max_val, "variance": variance, } def process_data(filename: str) -> Dict[str, Any]: """ Read numeric data from a file and calculate statistics. Parameters ---------- filename : str Path to the input file containing one number per line. Blank lines and non-numeric lines are skipped. Returns ------- Dict[str, Any] The statistics dictionary returned by calculate_statistics(). Raises ------ FileNotFoundError If the specified file does not exist. ValueError If the file cannot be read or contains no valid numbers. """ numbers: List[Union[int, float]] = [] try: with open(filename, 'r') as f: for line in f: stripped = line.strip() if not stripped: continue try: # Attempt to parse as float to handle both int and float numbers.append(float(stripped)) except ValueError: # Skip non-numeric lines continue except FileNotFoundError: raise FileNotFoundError(f"File not found: {filename}") except IOError as e: raise IOError(f"Error reading file {filename}: {e}") if not numbers: raise ValueError(f"No valid numeric data found in {filename}") result = calculate_statistics(numbers) print("Statistics:", result) return result def normalize(numbers: List[Union[int, float]], method: str = "minmax") -> List[float]: """ Normalize a list of numbers using the specified method. Parameters ---------- numbers : List[Union[int, float]] The list of numeric values to normalize. method : str, optional The normalization method to use. Options are: - "minmax": Min-Max normalization to [0, 1] - "zscore": Z-score normalization (standardization) Returns ------- List[float] The normalized list of numbers. Raises ------ ValueError If an unknown normalization method is provided, or if the list is empty. ZeroDivisionError If the range is zero for minmax or standard deviation is zero for zscore. """ if not numbers: raise ValueError("Cannot normalize an empty list.") if method == "minmax": mn = min(numbers) mx = max(numbers) range_val = mx - mn if range_val == 0: # If all values are the same, return zeros or handle as needed return [0.0 for _ in numbers] return [(x - mn) / range_val for x in numbers] elif method == "zscore": stats = calculate_statistics(numbers) std = stats["variance"] ** 0.5 if std == 0: # If standard deviation is zero, all values are the same return [0.0 for _ in numbers] return [(x - stats["average"]) / std for x in numbers] else: raise ValueError(f"Unknown normalization method: '{method}'. " f"Supported methods: 'minmax', 'zscore'.") if __name__ == "__main__": # Basic sanity checks sample = [4, 8, 15, 16, 23, 42] print("Testing calculate_statistics:") stats = calculate_statistics(sample) print(stats) print("\nTesting normalize (minmax):") normalized_minmax = normalize(sample, "minmax") print(normalized_minmax) print("\nTesting normalize (zscore):") normalized_zscore = normalize(sample, "zscore") print(normalized_zscore) print("\nTesting empty list handling:") empty_stats = calculate_statistics([]) print(empty_stats) print("\nTesting unknown method error:") try: normalize(sample, "unknown") except ValueError as e: print(f"Caught expected error: {e}") print("\nAll sanity checks passed!")