import pandas as pd import matplotlib.pyplot as plt import datetime def load_dataframe(): df = pd.read_excel("datasets/hardrock100_results_2022.xlsx", index_col=0) return df def time_str_to_seconds(time_object): return time_object.second + time_object.minute * 60 + time_object.hour * 3600 def calc_mean(df): total_age = 0 for _, row in df.iterrows(): total_age += row["age"] mean_age = total_age / len(df.index) return mean_age def main(): df = load_dataframe() print(df.head(3)) print(df.info()) # get column datatype print(df[df["finish"].isna()]) # filter finish column -> not available number_of_rows = len(df.index) # get number of rows print(f"Number of rows: {number_of_rows}") print(df.iloc[[5,8]]) # get place 5 & 8 print(f" Mean age: {calc_mean(df)}") # get mean age print(df["age"].mean()) # alt get mean age sorted_age = sorted(df["age"]) # sort column age print(sorted_age) # get median age median_index = (number_of_rows -1) // 2 if number_of_rows % 2: print(sorted_age[median_index]) else: print((sorted_age[median_index] + sorted_age[median_index + 1])/2) print(df["age"].median()) # alt get median age print(df["age"].describe()) # # get time in seconds time_str = "21:36:25" time_object = datetime.datetime.strptime(time_str, "%H:%M:%S") print(time_str_to_seconds(time_object)) na_value = datetime.time(0,0) df["finish"] = df ["finish"].fillna(na_value) df["finish_seconds"] = df["finish"].apply(time_str_to_seconds) df.boxplot(column=["finish_seconds"]) plt.title("Verteilung der Zielzeiten in Sekunden") plt.ylabel("Sekunden") plt.show() if __name__ == "__main__": main()