import sys import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder from py.arguments import Arguments if not sys.argv[1:]: print("Usage: python3 main.py ") sys.exit(1) FEATURES = ["points", "x", "y"] # create dataframe from csv and drop any row with null values def load_dataframe(file_path): try: colum_list = FEATURES df = pd.read_csv(file_path, usecols = colum_list).dropna() return df except FileNotFoundError as error: print(error) quit() def calc_f1_macro(y_true, y_pred): f1_scores = [] for column in y_true: score = calc_f1_score(y_true[column].values, y_pred[column]) f1_scores.append(score) return np.mean(f1_scores) def calc_f1_score(y_true, y_pred): tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) precision = calc_precision(tp, fp) recall = calc_recall(tp, fn) ''' if tp != 0 and fp != 0: precision = calc_precision(tp, fp) else: precision = 0 if tp != 0 and fn != 0: recall = calc_recall(tp, fn) else: recall = 0 ''' if precision != 0 and recall != 0: f1 = (2 * precision * recall) / (precision + recall) else: f1 = 0 return f1 def calc_precision(tp, fp): return tp / (tp + fp) def calc_recall(tp, fn): return tp / (tp + fn) def get_score_from_cli(mode): try: x = float(input("x: ")) y = float(input("y: ")) if mode == "v": abs_v = np.sqrt(x**2 + y**2) return np.array([abs_v]).reshape(1, -1) return np.array([x, y]).reshape(1, -1) except ValueError: print("Invalid input. Please enter numeric values.") return None def main(): repeat = True args = Arguments(sys.argv[1], "v", False, False) args.set_mode("v") args.set_information(False) args.set_graph(False) settings = { "repeat": True, "file": args.get_file_path(), "mode": args.get_mode(), "information": args.get_information(), "graph": args.get_graph() } while repeat: print("Currently selected setting:") print(f"File: {settings["file"]}") print(f"Mode: {settings["mode"]}") print(f"Display information: {settings["information"]}") print(f"Display graphs: {settings["graph"]}") prompt = input("Change settings [y / exit]: ") if prompt == "y": args.set_file_path(input("Change file : ")) args.set_mode(input("Change mode [v, a, c]: ")) args.set_information(bool(input("Display information [True / False]: "))) args.set_graph(bool(input("Display graphs [True / False]: "))) elif prompt == "exit": quit() # load dataframe with argument [1] df = load_dataframe(args.get_file_path()) # print dataframe information if argument [3] is true if args.get_information(): print(df.describe()) print(df.head()) print(df.head().info()) # display graphs if argument [4] is true if args.get_graph(): sns.countplot(x = df["points"]) plt.show() sns.heatmap(df.corr(), annot=True, cmap='coolwarm') plt.show() sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) plt.show() # use verctor length of (x,y) as feature if args.get_mode() == "v": df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) X = df[["radius"]] # use absoult values of (x,y) as feature elif args.get_mode() == "a": df_abs = df.copy().abs() features = ["x", "y"] X = df[features] # use unaltered values of (x,y) as feature elif args.get_mode() == "c": features = ["x", "y"] X = df[features] y = pd.get_dummies(df['points']) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) random_forest = RandomForestClassifier(n_estimators=700, random_state=0) decision_tree = DecisionTreeClassifier(random_state=0) k_neighbors = KNeighborsClassifier(n_neighbors=5) models = { "Random Forest Classifier": random_forest, "Decision Tree Classifier": decision_tree, "K-Neighbors": k_neighbors } for name, model in models.items(): model.fit(X_train.values, y_train.values) for name, model in models.items(): pred = model.predict(X_test.values) my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) print(f'My F1 score of {name} is {my_f1_macro_score}\n') f1_sklearn = f1_score(y_test.values, pred, average='macro') print(f'Sklearn F1 score of {name} is {f1_sklearn}\n') score = get_score_from_cli(args.get_mode) label_encoder = LabelEncoder() df["points"] = label_encoder.fit_transform(df["points"]) for name, model in models.items(): pred = model.predict(score) points_number = pd.DataFrame(pred).idxmax(axis=1) points = label_encoder.inverse_transform(points_number)[0] print(f"{name}: {points} Punkte") if __name__ == "__main__": main()