65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import datetime
|
|
|
|
def load_dataframe():
|
|
df = pd.read_excel("datasets/hardrock100_results_2022.xlsx", index_col=0)
|
|
return df
|
|
|
|
def time_str_to_seconds(time_object):
|
|
return time_object.second + time_object.minute * 60 + time_object.hour * 3600
|
|
|
|
def calc_mean(df):
|
|
total_age = 0
|
|
for _, row in df.iterrows():
|
|
total_age += row["age"]
|
|
mean_age = total_age / len(df.index)
|
|
return mean_age
|
|
|
|
def main():
|
|
df = load_dataframe()
|
|
print(df.head(3))
|
|
|
|
print(df.info()) # get column datatype
|
|
|
|
print(df[df["finish"].isna()]) # filter finish column -> not available
|
|
|
|
number_of_rows = len(df.index) # get number of rows
|
|
print(f"Number of rows: {number_of_rows}")
|
|
|
|
print(df.iloc[[5,8]]) # get place 5 & 8
|
|
|
|
print(f" Mean age: {calc_mean(df)}") # get mean age
|
|
print(df["age"].mean()) # alt get mean age
|
|
|
|
sorted_age = sorted(df["age"]) # sort column age
|
|
print(sorted_age)
|
|
|
|
# get median age
|
|
median_index = (number_of_rows -1) // 2
|
|
if number_of_rows % 2:
|
|
print(sorted_age[median_index])
|
|
else:
|
|
print((sorted_age[median_index] + sorted_age[median_index + 1])/2)
|
|
|
|
print(df["age"].median()) # alt get median age
|
|
|
|
print(df["age"].describe()) #
|
|
|
|
# get time in seconds
|
|
time_str = "21:36:25"
|
|
time_object = datetime.datetime.strptime(time_str, "%H:%M:%S")
|
|
print(time_str_to_seconds(time_object))
|
|
|
|
na_value = datetime.time(0,0)
|
|
df["finish"] = df ["finish"].fillna(na_value)
|
|
|
|
df["finish_seconds"] = df["finish"].apply(time_str_to_seconds)
|
|
|
|
df.boxplot(column=["finish_seconds"])
|
|
plt.title("Verteilung der Zielzeiten in Sekunden")
|
|
plt.ylabel("Sekunden")
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
main() |