diff --git a/main.py b/main.py index 27d5407..92ef346 100644 --- a/main.py +++ b/main.py @@ -9,8 +9,88 @@ from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder +FEATURES = ["points", "x", "y"] + +# create dataframe from csv and drop any row with null values +def load_dataframe(): + try: + colum_list = FEATURES + #df = pd.read_csv("data/shots_dev.csv", usecols = colum_list).dropna() + df = pd.read_csv("data/shots.csv", usecols = colum_list).dropna() + return df + except FileNotFoundError as error: + print(error) + quit() + +def calc_f1_macro(y_true, y_pred): + f1_scores = [] + for column in y_true: + score = calc_f1_score(y_true[column].values, y_pred[column]) + f1_scores.append(score) + return np.mean(f1_scores) + +def calc_f1_score(y_true, y_pred): + tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) + tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) + fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) + fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) + precision = calc_precision(tp, fp) + recall = calc_recall(tp, fn) + + if precision != 0 and recall != 0: + f1 = (2 * precision * recall) / (precision + recall) + else: + f1 = 0 + return f1 + +def calc_precision(tp, fp): + return tp / (tp + fp) + +def calc_recall(tp, fn): + return tp / (tp + fn) + def main(): - pass + df = load_dataframe() + #print(df.head()) + + '''sns.countplot(x = df["points"]) + plt.show() + + sns.heatmap(df.corr(), annot=True, cmap='coolwarm') + plt.show() + + sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) + plt.show()''' + + features = ["x", "y"] + + X = df[features] + y = pd.get_dummies(df['points']) + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) + + random_forest = RandomForestClassifier(n_estimators=700, random_state=0) + decision_tree = DecisionTreeClassifier(random_state=0) + k_neighbors = KNeighborsClassifier(n_neighbors=5) + + models = { + "Random Forest Classifier": random_forest, + "Decision Tree Classifier": decision_tree, + "K-Neighbors": k_neighbors + } + + for name, model in models.items(): + model.fit(X_train.values, y_train.values) + + for name, model in models.items(): + pred = model.predict(X_test.values) + + my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) + print(f'My F1 score of {name} is {my_f1_macro_score}') + + f1_sklearn = f1_score(y_test.values, pred, average='macro') + print(f'Sklearn F1 score of {name} is {f1_sklearn}') + if __name__ == "__main__": main() \ No newline at end of file