cds1011-ls1/heatmap.py

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# loading the seperate excel sheets for 2023-2025

def load_dataframe25():
    df2025 = pd.read_excel("Auswertung_DataScience_cons.xlsm", sheet_name=0, header=2, index_col=2)
    return df2025

def load_dataframe24():
    df2024 = pd.read_excel("Auswertung_DataScience_cons.xlsm", sheet_name=1, header=2, index_col=2)
    return df2024

def load_dataframe23():
    df2023 = pd.read_excel("Auswertung_DataScience_cons.xlsm", sheet_name=2, header=2, index_col=2)
    return df2023

# function to create a suitable pivot-table from the xlsm file
def generate_hm_df(df):
    # create a column "count" that contains the nr of (coordinate) duplicates
    cols = ['Aveg X', 'Aveg Y']
    df['count'] = df.groupby(cols)['Aveg X'].transform('size')
    print(df.columns)
    print(df)

    # drop coordinate duplicates
    df_nd = df.drop_duplicates(subset=cols)
    print(df_nd)

    # create a pivot table with the x-coordinates as columns and y-coordinates as rows, containing the nr. of duplicates = nr of shots
    pivot = df_nd.pivot_table(values='count', index='Aveg Y', columns='Aveg X', fill_value=0)
    print(pivot)

    # create a list with the 41 coordinates
    y=-10
    coordinates = []
    for x in range(41):
        coordinates.append(y)
        y = y + 0.5
    else:
        print(coordinates)


    # check if columns contain all coordinates from -10 to 10, create columns if necessary
    y=-10.0
    while (y <= 10) == True:
        if y in pivot.columns:
            print("no column added for", y)
            y = y + 0.5
        else:
            pivot.insert(29, y, 0.0)
            print("column added for", y)
            y = y + 0.5

    # check if rows contain all coordinates from -10 to 10, create rows if necessary
    y=-10.0
    while (y<=10) == True:
        if y in pivot.index:
            print("no row added for", y)
            y = y + 0.5
        else:
            pivot = pivot._append(pd.Series(name=y))
            pivot = pivot.fillna(value=0)
            print("row added for", y)
            y = y + 0.5

    # sort the columns and rows
    pivot = pivot.sort_index(axis=1)
    pivot = pivot.sort_index(axis=0, ascending=False)

    print(pivot.index)
    print(pivot.columns)

    # remove columns and rows <-10 and >10
    for x in pivot.columns:
        if (x < -10) or (x > 10):
            pivot.drop(labels=x, axis=1, inplace=True)
            print("COLUMN", x, "HAS BEEN DELETED.")
            continue
        else:
            print("Row", x, "has not been deleted.")
            continue

    for x in pivot.index:
        if (x < -10) or (x > 10):
            pivot.drop(labels=x, axis=0, inplace=True)
            print("ROW", x, "HAS BEEN DELETED.")
            continue
        else:
            print("Row", x, "has not been deleted.")
            continue

    print(pivot)

    return pivot

def main():

    # change the load_datframe23() to whatever year you want the heatmap from
    df = load_dataframe25()
    df_hm = generate_hm_df(df)

    # generate the heatmap, rename both axis
    pal = sns.cubehelix_palette(start=2, rot=0.3, dark=0, light=1, reverse=False, as_cmap=True)
    hm = sns.heatmap(df_hm, cmap=pal, annot=False, annot_kws={'size': 9}, square=True)
    hm.set(xlabel="X-Koordinaten", ylabel="Y-Koordinaten")
    plt.show()

if __name__ == "__main__":
    main()