AISE1_CLASS/Prompting Exercise/analyze_me_direct.py

"""
analyze_me.py – A data-processing script used in Exercise 2
==============================================================
This module provides robust functions for calculating statistics,
processing data files, and normalizing numeric lists.

All functions include PEP-484 type hints and NumPy-style docstrings.
"""

from typing import List, Dict, Union, Any


def calculate_statistics(numbers: List[Union[int, float]]) -> Dict[str, Any]:
    """
    Calculate basic statistics for a list of numbers.

    Parameters
    ----------
    numbers : List[Union[int, float]]
        The list of numeric values to analyze.

    Returns
    -------
    Dict[str, Any]
        A dictionary containing count, sum, average, min, max, and variance.
        If the input list is empty, returns a dictionary with zero values
        for all fields except count (which is 0).

    Notes
    -----
    - Variance is calculated using the sample variance formula (dividing by N-1).
    - If the list is empty, the function returns early to avoid division by zero
      or index errors.
    """
    count = len(numbers)

    if count == 0:
        return {
            "count": 0,
            "sum": 0.0,
            "average": 0.0,
            "min": 0.0,
            "max": 0.0,
            "variance": 0.0,
        }

    total = sum(numbers)
    average = total / count

    min_val = min(numbers)
    max_val = max(numbers)

    # Calculate sample variance (divide by N-1)
    variance_sum = sum((n - average) ** 2 for n in numbers)
    variance = variance_sum / (count - 1)

    return {
        "count": count,
        "sum": total,
        "average": average,
        "min": min_val,
        "max": max_val,
        "variance": variance,
    }


def process_data(filename: str) -> Dict[str, Any]:
    """
    Read numeric data from a file and calculate statistics.

    Parameters
    ----------
    filename : str
        Path to the input file containing one number per line.
        Blank lines and non-numeric lines are skipped.

    Returns
    -------
    Dict[str, Any]
        The statistics dictionary returned by calculate_statistics().

    Raises
    ------
    FileNotFoundError
        If the specified file does not exist.
    ValueError
        If the file cannot be read or contains no valid numbers.
    """
    numbers: List[Union[int, float]] = []

    try:
        with open(filename, 'r') as f:
            for line in f:
                stripped = line.strip()
                if not stripped:
                    continue
                try:
                    # Attempt to parse as float to handle both int and float
                    numbers.append(float(stripped))
                except ValueError:
                    # Skip non-numeric lines
                    continue
    except FileNotFoundError:
        raise FileNotFoundError(f"File not found: {filename}")
    except IOError as e:
        raise IOError(f"Error reading file {filename}: {e}")

    if not numbers:
        raise ValueError(f"No valid numeric data found in {filename}")

    result = calculate_statistics(numbers)
    print("Statistics:", result)
    return result


def normalize(numbers: List[Union[int, float]], method: str = "minmax") -> List[float]:
    """
    Normalize a list of numbers using the specified method.

    Parameters
    ----------
    numbers : List[Union[int, float]]
        The list of numeric values to normalize.
    method : str, optional
        The normalization method to use. Options are:
        - "minmax": Min-Max normalization to [0, 1]
        - "zscore": Z-score normalization (standardization)

    Returns
    -------
    List[float]
        The normalized list of numbers.

    Raises
    ------
    ValueError
        If an unknown normalization method is provided, or if the list is empty.
    ZeroDivisionError
        If the range is zero for minmax or standard deviation is zero for zscore.
    """
    if not numbers:
        raise ValueError("Cannot normalize an empty list.")

    if method == "minmax":
        mn = min(numbers)
        mx = max(numbers)
        range_val = mx - mn
        if range_val == 0:
            # If all values are the same, return zeros or handle as needed
            return [0.0 for _ in numbers]
        return [(x - mn) / range_val for x in numbers]

    elif method == "zscore":
        stats = calculate_statistics(numbers)
        std = stats["variance"] ** 0.5
        if std == 0:
            # If standard deviation is zero, all values are the same
            return [0.0 for _ in numbers]
        return [(x - stats["average"]) / std for x in numbers]

    else:
        raise ValueError(f"Unknown normalization method: '{method}'. "
                         f"Supported methods: 'minmax', 'zscore'.")


if __name__ == "__main__":
    # Basic sanity checks
    sample = [4, 8, 15, 16, 23, 42]

    print("Testing calculate_statistics:")
    stats = calculate_statistics(sample)
    print(stats)

    print("\nTesting normalize (minmax):")
    normalized_minmax = normalize(sample, "minmax")
    print(normalized_minmax)

    print("\nTesting normalize (zscore):")
    normalized_zscore = normalize(sample, "zscore")
    print(normalized_zscore)

    print("\nTesting empty list handling:")
    empty_stats = calculate_statistics([])
    print(empty_stats)

    print("\nTesting unknown method error:")
    try:
        normalize(sample, "unknown")
    except ValueError as e:
        print(f"Caught expected error: {e}")

    print("\nAll sanity checks passed!")