192 lines
5.5 KiB
Python
192 lines
5.5 KiB
Python
"""
|
||
analyze_me.py – A data-processing script used in Exercise 2
|
||
==============================================================
|
||
This module provides robust functions for calculating statistics,
|
||
processing data files, and normalizing numeric lists.
|
||
|
||
All functions include PEP-484 type hints and NumPy-style docstrings.
|
||
"""
|
||
|
||
from typing import List, Dict, Union, Any
|
||
|
||
|
||
def calculate_statistics(numbers: List[Union[int, float]]) -> Dict[str, Any]:
|
||
"""
|
||
Calculate basic statistics for a list of numbers.
|
||
|
||
Parameters
|
||
----------
|
||
numbers : List[Union[int, float]]
|
||
The list of numeric values to analyze.
|
||
|
||
Returns
|
||
-------
|
||
Dict[str, Any]
|
||
A dictionary containing count, sum, average, min, max, and variance.
|
||
If the input list is empty, returns a dictionary with zero values
|
||
for all fields except count (which is 0).
|
||
|
||
Notes
|
||
-----
|
||
- Variance is calculated using the sample variance formula (dividing by N-1).
|
||
- If the list is empty, the function returns early to avoid division by zero
|
||
or index errors.
|
||
"""
|
||
count = len(numbers)
|
||
|
||
if count == 0:
|
||
return {
|
||
"count": 0,
|
||
"sum": 0.0,
|
||
"average": 0.0,
|
||
"min": 0.0,
|
||
"max": 0.0,
|
||
"variance": 0.0,
|
||
}
|
||
|
||
total = sum(numbers)
|
||
average = total / count
|
||
|
||
min_val = min(numbers)
|
||
max_val = max(numbers)
|
||
|
||
# Calculate sample variance (divide by N-1)
|
||
variance_sum = sum((n - average) ** 2 for n in numbers)
|
||
variance = variance_sum / (count - 1)
|
||
|
||
return {
|
||
"count": count,
|
||
"sum": total,
|
||
"average": average,
|
||
"min": min_val,
|
||
"max": max_val,
|
||
"variance": variance,
|
||
}
|
||
|
||
|
||
def process_data(filename: str) -> Dict[str, Any]:
|
||
"""
|
||
Read numeric data from a file and calculate statistics.
|
||
|
||
Parameters
|
||
----------
|
||
filename : str
|
||
Path to the input file containing one number per line.
|
||
Blank lines and non-numeric lines are skipped.
|
||
|
||
Returns
|
||
-------
|
||
Dict[str, Any]
|
||
The statistics dictionary returned by calculate_statistics().
|
||
|
||
Raises
|
||
------
|
||
FileNotFoundError
|
||
If the specified file does not exist.
|
||
ValueError
|
||
If the file cannot be read or contains no valid numbers.
|
||
"""
|
||
numbers: List[Union[int, float]] = []
|
||
|
||
try:
|
||
with open(filename, 'r') as f:
|
||
for line in f:
|
||
stripped = line.strip()
|
||
if not stripped:
|
||
continue
|
||
try:
|
||
# Attempt to parse as float to handle both int and float
|
||
numbers.append(float(stripped))
|
||
except ValueError:
|
||
# Skip non-numeric lines
|
||
continue
|
||
except FileNotFoundError:
|
||
raise FileNotFoundError(f"File not found: {filename}")
|
||
except IOError as e:
|
||
raise IOError(f"Error reading file {filename}: {e}")
|
||
|
||
if not numbers:
|
||
raise ValueError(f"No valid numeric data found in {filename}")
|
||
|
||
result = calculate_statistics(numbers)
|
||
print("Statistics:", result)
|
||
return result
|
||
|
||
|
||
def normalize(numbers: List[Union[int, float]], method: str = "minmax") -> List[float]:
|
||
"""
|
||
Normalize a list of numbers using the specified method.
|
||
|
||
Parameters
|
||
----------
|
||
numbers : List[Union[int, float]]
|
||
The list of numeric values to normalize.
|
||
method : str, optional
|
||
The normalization method to use. Options are:
|
||
- "minmax": Min-Max normalization to [0, 1]
|
||
- "zscore": Z-score normalization (standardization)
|
||
|
||
Returns
|
||
-------
|
||
List[float]
|
||
The normalized list of numbers.
|
||
|
||
Raises
|
||
------
|
||
ValueError
|
||
If an unknown normalization method is provided, or if the list is empty.
|
||
ZeroDivisionError
|
||
If the range is zero for minmax or standard deviation is zero for zscore.
|
||
"""
|
||
if not numbers:
|
||
raise ValueError("Cannot normalize an empty list.")
|
||
|
||
if method == "minmax":
|
||
mn = min(numbers)
|
||
mx = max(numbers)
|
||
range_val = mx - mn
|
||
if range_val == 0:
|
||
# If all values are the same, return zeros or handle as needed
|
||
return [0.0 for _ in numbers]
|
||
return [(x - mn) / range_val for x in numbers]
|
||
|
||
elif method == "zscore":
|
||
stats = calculate_statistics(numbers)
|
||
std = stats["variance"] ** 0.5
|
||
if std == 0:
|
||
# If standard deviation is zero, all values are the same
|
||
return [0.0 for _ in numbers]
|
||
return [(x - stats["average"]) / std for x in numbers]
|
||
|
||
else:
|
||
raise ValueError(f"Unknown normalization method: '{method}'. "
|
||
f"Supported methods: 'minmax', 'zscore'.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# Basic sanity checks
|
||
sample = [4, 8, 15, 16, 23, 42]
|
||
|
||
print("Testing calculate_statistics:")
|
||
stats = calculate_statistics(sample)
|
||
print(stats)
|
||
|
||
print("\nTesting normalize (minmax):")
|
||
normalized_minmax = normalize(sample, "minmax")
|
||
print(normalized_minmax)
|
||
|
||
print("\nTesting normalize (zscore):")
|
||
normalized_zscore = normalize(sample, "zscore")
|
||
print(normalized_zscore)
|
||
|
||
print("\nTesting empty list handling:")
|
||
empty_stats = calculate_statistics([])
|
||
print(empty_stats)
|
||
|
||
print("\nTesting unknown method error:")
|
||
try:
|
||
normalize(sample, "unknown")
|
||
except ValueError as e:
|
||
print(f"Caught expected error: {e}")
|
||
|
||
print("\nAll sanity checks passed!") |