main.py updated

2025-11-26 22:03:23 +01:00 · 2025-11-26 22:03:23 +01:00 · deb2fb80ae
commit deb2fb80ae
parent 76eaa4aa4e
1 changed files with 81 additions and 1 deletions
--- a/main.py
+++ b/main.py
@ -9,8 +9,88 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.preprocessing import LabelEncoder
 FEATURES = ["points", "x", "y"]
 # create dataframe from csv and drop any row with null values
 def load_dataframe():
    try:
        colum_list = FEATURES
        #df = pd.read_csv("data/shots_dev.csv", usecols = colum_list).dropna()
        df = pd.read_csv("data/shots.csv", usecols = colum_list).dropna()
        return df
    except FileNotFoundError as error:
        print(error)
        quit() 
 def calc_f1_macro(y_true, y_pred):
    f1_scores = []
    for column in y_true:
        score = calc_f1_score(y_true[column].values, y_pred[column])
        f1_scores.append(score)
    return np.mean(f1_scores)
 def calc_f1_score(y_true, y_pred):
    tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
    tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
    fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
    fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
    precision = calc_precision(tp, fp)
    recall = calc_recall(tp, fn)
    if precision != 0 and recall != 0:
        f1 = (2 * precision * recall) / (precision + recall)
    else:
        f1 = 0
    return f1
 def calc_precision(tp, fp):
    return tp / (tp + fp)
 def calc_recall(tp, fn):
    return tp / (tp + fn)
 def main():
-    pass
+    df = load_dataframe()
    #print(df.head())
    '''sns.countplot(x = df["points"])
    plt.show()
    sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
    plt.show()
    sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
    plt.show()'''
    features = ["x", "y"]
    X = df[features]
    y = pd.get_dummies(df['points'])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
    random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
    decision_tree = DecisionTreeClassifier(random_state=0)
    k_neighbors = KNeighborsClassifier(n_neighbors=5)
    models = {
        "Random Forest Classifier": random_forest,
        "Decision Tree Classifier": decision_tree,
        "K-Neighbors": k_neighbors
    }
    for name, model in models.items():
        model.fit(X_train.values, y_train.values)
    for name, model in models.items():
        pred = model.predict(X_test.values)
        my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
        print(f'My F1 score of {name} is {my_f1_macro_score}')
        f1_sklearn = f1_score(y_test.values, pred, average='macro')
        print(f'Sklearn F1 score of {name} is {f1_sklearn}')
 if __name__ == "__main__":
    main()