AISE1_CLASS/Prompting Exercise/analyze_me_direct.py

192 lines
5.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
analyze_me.py A data-processing script used in Exercise 2
==============================================================
This module provides robust functions for calculating statistics,
processing data files, and normalizing numeric lists.
All functions include PEP-484 type hints and NumPy-style docstrings.
"""
from typing import List, Dict, Union, Any
def calculate_statistics(numbers: List[Union[int, float]]) -> Dict[str, Any]:
"""
Calculate basic statistics for a list of numbers.
Parameters
----------
numbers : List[Union[int, float]]
The list of numeric values to analyze.
Returns
-------
Dict[str, Any]
A dictionary containing count, sum, average, min, max, and variance.
If the input list is empty, returns a dictionary with zero values
for all fields except count (which is 0).
Notes
-----
- Variance is calculated using the sample variance formula (dividing by N-1).
- If the list is empty, the function returns early to avoid division by zero
or index errors.
"""
count = len(numbers)
if count == 0:
return {
"count": 0,
"sum": 0.0,
"average": 0.0,
"min": 0.0,
"max": 0.0,
"variance": 0.0,
}
total = sum(numbers)
average = total / count
min_val = min(numbers)
max_val = max(numbers)
# Calculate sample variance (divide by N-1)
variance_sum = sum((n - average) ** 2 for n in numbers)
variance = variance_sum / (count - 1)
return {
"count": count,
"sum": total,
"average": average,
"min": min_val,
"max": max_val,
"variance": variance,
}
def process_data(filename: str) -> Dict[str, Any]:
"""
Read numeric data from a file and calculate statistics.
Parameters
----------
filename : str
Path to the input file containing one number per line.
Blank lines and non-numeric lines are skipped.
Returns
-------
Dict[str, Any]
The statistics dictionary returned by calculate_statistics().
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file cannot be read or contains no valid numbers.
"""
numbers: List[Union[int, float]] = []
try:
with open(filename, 'r') as f:
for line in f:
stripped = line.strip()
if not stripped:
continue
try:
# Attempt to parse as float to handle both int and float
numbers.append(float(stripped))
except ValueError:
# Skip non-numeric lines
continue
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {filename}")
except IOError as e:
raise IOError(f"Error reading file {filename}: {e}")
if not numbers:
raise ValueError(f"No valid numeric data found in {filename}")
result = calculate_statistics(numbers)
print("Statistics:", result)
return result
def normalize(numbers: List[Union[int, float]], method: str = "minmax") -> List[float]:
"""
Normalize a list of numbers using the specified method.
Parameters
----------
numbers : List[Union[int, float]]
The list of numeric values to normalize.
method : str, optional
The normalization method to use. Options are:
- "minmax": Min-Max normalization to [0, 1]
- "zscore": Z-score normalization (standardization)
Returns
-------
List[float]
The normalized list of numbers.
Raises
------
ValueError
If an unknown normalization method is provided, or if the list is empty.
ZeroDivisionError
If the range is zero for minmax or standard deviation is zero for zscore.
"""
if not numbers:
raise ValueError("Cannot normalize an empty list.")
if method == "minmax":
mn = min(numbers)
mx = max(numbers)
range_val = mx - mn
if range_val == 0:
# If all values are the same, return zeros or handle as needed
return [0.0 for _ in numbers]
return [(x - mn) / range_val for x in numbers]
elif method == "zscore":
stats = calculate_statistics(numbers)
std = stats["variance"] ** 0.5
if std == 0:
# If standard deviation is zero, all values are the same
return [0.0 for _ in numbers]
return [(x - stats["average"]) / std for x in numbers]
else:
raise ValueError(f"Unknown normalization method: '{method}'. "
f"Supported methods: 'minmax', 'zscore'.")
if __name__ == "__main__":
# Basic sanity checks
sample = [4, 8, 15, 16, 23, 42]
print("Testing calculate_statistics:")
stats = calculate_statistics(sample)
print(stats)
print("\nTesting normalize (minmax):")
normalized_minmax = normalize(sample, "minmax")
print(normalized_minmax)
print("\nTesting normalize (zscore):")
normalized_zscore = normalize(sample, "zscore")
print(normalized_zscore)
print("\nTesting empty list handling:")
empty_stats = calculate_statistics([])
print(empty_stats)
print("\nTesting unknown method error:")
try:
normalize(sample, "unknown")
except ValueError as e:
print(f"Caught expected error: {e}")
print("\nAll sanity checks passed!")