AISE1_CLASS/Prompting Exercise/analyze_me_fixed.py

"""
analyze_me.py  –  A data-processing script used in Exercise 2
==============================================================
This file contains several realistic bugs and style issues.
Do NOT fix them manually — in Exercise 2 the LLM will help you find them!

Can you spot the issues yourself before asking the LLM?
"""


def calculate_statistics(numbers: list[float]) -> dict[str, float]:
    """
    Calculate basic statistical measures for a list of numbers.

    This function computes the count, sum, average, minimum, maximum, and
    variance (population variance) of the provided list of numbers.

    Parameters
    ----------
    numbers : list[float]
        A list of numeric values to analyze.

    Returns
    -------
    dict[str, float]
        A dictionary containing the following keys:
        - 'count': The number of elements in the list.
        - 'sum': The sum of all elements.
        - 'average': The arithmetic mean of the elements.
        - 'min': The minimum value in the list.
        - 'max': The maximum value in the list.
        - 'variance': The population variance of the elements.

    Raises
    ------
    ZeroDivisionError
        If the input list is empty, division by zero will occur when
        calculating the average and variance.
    IndexError
        If the input list is empty, accessing the first element for min/max
        will raise an error.
    """
    # Step 2 – Implement empty list handling in calculate_statistics
    if not numbers:
        return {
            "count": 0,
            "sum": 0.0,
            "average": 0.0,
            "min": 0.0,
            "max": 0.0,
            "variance": 0.0,
        }

    total = 0
    for n in numbers:
        total = total + n
    average = total / len(numbers)       # Bug 1: ZeroDivisionError when list is empty

    min_val = numbers[0]                 # Bug 2: IndexError when list is empty
    max_val = numbers[0]
    for n in numbers:
        if n < min_val:
            min_val = n
        if n > max_val:
            max_val = n

    variance = 0
    for n in numbers:
        variance = variance + (n - average) ** 2

    # Step 3 – Correct variance calculation to use sample variance
    count = len(numbers)
    if count > 1:
        variance = variance / (count - 1)
    else:
        variance = 0.0

    return {
        "count":    len(numbers),
        "sum":      total,
        "average":  average,
        "min":      min_val,
        "max":      max_val,
        "variance": variance,
    }


# Step 4 – Define type hints and docstrings for process_data
def process_data(filename: str) -> dict[str, float]:
    """
    Read numeric data from a file and compute statistics.

    This function opens a text file, reads each line, converts it to an integer,
    and collects the values into a list. It then passes this list to
    calculate_statistics to compute and return the statistical summary.

    Parameters
    ----------
    filename : str
        The path to the text file containing one number per line.

    Returns
    -------
    dict[str, float]
        A dictionary containing the statistical measures computed from the file data.

    Raises
    ------
    FileNotFoundError
        If the specified file does not exist.
    ValueError
        If a line in the file cannot be converted to an integer.
    """
    numbers = []
    # Step 5 – Implement context manager and robust line parsing in process_data
    with open(filename) as f:
        for line in f:
            stripped = line.strip()
            if not stripped:
                continue
            try:
                # Attempt to convert to float first to handle both ints and floats
                value = float(stripped)
                numbers.append(value)
            except ValueError:
                # Skip lines that cannot be converted to a number
                continue

    result = calculate_statistics(numbers)
    print("Statistics:", result)
    return result


# Step 6 – Define type hints and docstrings for normalize
def normalize(numbers: list[float], method: str = "minmax") -> list[float]:
    """
    Normalize a list of numbers using the specified method.

    This function applies either 'minmax' scaling or 'zscore' standardization
    to the input list of numbers.

    Parameters
    ----------
    numbers : list[float]
        A list of numeric values to normalize.
    method : str, optional
        The normalization method to use. Options are:
        - 'minmax': Scales values to the range [0, 1].
        - 'zscore': Standardizes values to have mean 0 and standard deviation 1.
        Default is 'minmax'.

    Returns
    -------
    list[float]
        A list of normalized values.

    Raises
    ------
    ValueError
        If an unknown normalization method is provided.
    ZeroDivisionError
        If 'minmax' is used on a list where all values are identical (range is 0),
        or if 'zscore' is used on a list with zero standard deviation.

    Examples
    --------
    >>> normalize([1, 2, 3, 4, 5])
    [0.0, 0.25, 0.5, 0.75, 1.0]
    """
    if method == "minmax":
        mn = min(numbers)
        mx = max(numbers)
        # Step 7 – Fix operator precedence bug in minmax normalization
        return [(x - mn) / (mx - mn) for x in numbers]
    elif method == "zscore":
        stats = calculate_statistics(numbers)
        std = stats["variance"] ** 0.5
        return [(x - stats["average"]) / std for x in numbers]
    else:
        # Step 8 – Replace print statement with ValueError for unknown methods
        raise ValueError(f"Unknown normalisation method: {method}")


if __name__ == "__main__":
    # Step 9 – Implement and verify main block sanity checks
    sample = [4, 8, 15, 16, 23, 42]
    stats = calculate_statistics(sample)

    # Verify expected values for sample data
    expected_sum = 4 + 8 + 15 + 16 + 23 + 42
    expected_count = 6
    expected_avg = expected_sum / expected_count

    assert stats["count"] == expected_count, f"Count mismatch: {stats['count']} != {expected_count}"
    assert stats["sum"] == expected_sum, f"Sum mismatch: {stats['sum']} != {expected_sum}"
    assert abs(stats["average"] - expected_avg) < 1e-9, f"Average mismatch: {stats['average']} != {expected_avg}"
    assert stats["min"] == 4, f"Min mismatch: {stats['min']} != 4"
    assert stats["max"] == 42, f"Max mismatch: {stats['max']} != 42"

    # Test empty list handling
    empty_stats = calculate_statistics([])
    assert empty_stats["count"] == 0, "Empty list count should be 0"
    assert empty_stats["sum"] == 0.0, "Empty list sum should be 0.0"
    assert empty_stats["average"] == 0.0, "Empty list average should be 0.0"
    assert empty_stats["min"] == 0.0, "Empty list min should be 0.0"
    assert empty_stats["max"] == 0.0, "Empty list max should be 0.0"
    assert empty_stats["variance"] == 0.0, "Empty list variance should be 0.0"

    # Test normalization
    normalized = normalize([1, 2, 3, 4, 5])
    expected_normalized = [0.0, 0.25, 0.5, 0.75, 1.0]
    assert len(normalized) == 5, "Normalized list length mismatch"
    for i, val in enumerate(normalized):
        assert abs(val - expected_normalized[i]) < 1e-9, f"Normalized value mismatch at index {i}"

    print("All sanity checks passed!")