diff --git a/main.py b/main.py index f896e48..3477b2d 100644 --- a/main.py +++ b/main.py @@ -1,84 +1,11 @@ import sys -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -from sklearn.metrics import f1_score -from sklearn.model_selection import train_test_split -from sklearn.ensemble import RandomForestClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.tree import DecisionTreeClassifier -from sklearn.preprocessing import LabelEncoder from py.arguments import Arguments +from py.modell import * if not sys.argv[1:]: print("Usage: python3 main.py ") sys.exit(1) -FEATURES = ["points", "x", "y"] - -# create dataframe from csv and drop any row with null values -def load_dataframe(file_path): - try: - colum_list = FEATURES - df = pd.read_csv(file_path, usecols = colum_list).dropna() - return df - except FileNotFoundError as error: - print(error) - quit() - -def calc_f1_macro(y_true, y_pred): - f1_scores = [] - for column in y_true: - score = calc_f1_score(y_true[column].values, y_pred[column]) - f1_scores.append(score) - return np.mean(f1_scores) - -def calc_f1_score(y_true, y_pred): - tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) - tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) - fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) - fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) - - precision = calc_precision(tp, fp) - recall = calc_recall(tp, fn) - - ''' - if tp != 0 and fp != 0: - precision = calc_precision(tp, fp) - else: - precision = 0 - - if tp != 0 and fn != 0: - recall = calc_recall(tp, fn) - else: - recall = 0 - ''' - - if precision != 0 and recall != 0: - f1 = (2 * precision * recall) / (precision + recall) - else: - f1 = 0 - return f1 - -def calc_precision(tp, fp): - return tp / (tp + fp) - -def calc_recall(tp, fn): - return tp / (tp + fn) - -def get_score_from_cli(mode): - try: - x = float(input("x: ")) - y = float(input("y: ")) - if mode == "v": - abs_v = np.sqrt(x**2 + y**2) - return np.array([abs_v]).reshape(1, -1) - return np.array([x, y]).reshape(1, -1) - except ValueError: - print("Invalid input. Please enter numeric values.") - return None - def main(): repeat = True @@ -87,103 +14,50 @@ def main(): args.set_information(False) args.set_graph(False) - settings = { - "repeat": True, - "file": args.get_file_path(), - "mode": args.get_mode(), - "information": args.get_information(), - "graph": args.get_graph() - } - while repeat: print("Currently selected setting:") - print(f"File: {settings["file"]}") - print(f"Mode: {settings["mode"]}") - print(f"Display information: {settings["information"]}") - print(f"Display graphs: {settings["graph"]}") + print(f"File: {args.get_file_path()}") + print(f"Mode: {args.get_mode()}") + print(f"Display information: {args.get_information()}") + print(f"Display graphs: {args.get_graph()}") - prompt = input("Change settings [y / exit]: ") + prompt = input("Change settings [y / exit / blank]: ") if prompt == "y": - args.set_file_path(input("Change file : ")) - args.set_mode(input("Change mode [v, a, c]: ")) - args.set_information(bool(input("Display information [True / False]: "))) - args.set_graph(bool(input("Display graphs [True / False]: "))) + try: + args.set_file_path(input("Change file : ")) + args.set_mode(input("Change mode [v, a, c]: ")) + args.set_information(eval(input("Display information [True / False]: "))) + args.set_graph(eval(input("Display graphs [True / False]: "))) + except ValueError as error: + print(f"Value {error}") elif prompt == "exit": quit() - # load dataframe with argument [1] - df = load_dataframe(args.get_file_path()) - - # print dataframe information if argument [3] is true - if args.get_information(): - print(df.describe()) - print(df.head()) - print(df.head().info()) - - # display graphs if argument [4] is true - if args.get_graph(): - - sns.countplot(x = df["points"]) - plt.show() - - sns.heatmap(df.corr(), annot=True, cmap='coolwarm') - plt.show() - - sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) - plt.show() - # use verctor length of (x,y) as feature if args.get_mode() == "v": - df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) - X = df[["radius"]] + load_dataframe = make_dataframe(lambda df: df) + features = make_features(radius) + score = make_score_function(lambda x, y: [np.sqrt(x**2 + y**2)]) # use absoult values of (x,y) as feature elif args.get_mode() == "a": - df_abs = df.copy().abs() - features = ["x", "y"] - X = df[features] + load_dataframe = make_dataframe(lambda df: df.abs()) + features = make_features(xy) + score = make_score_function(lambda x, y: [x, y]) # use unaltered values of (x,y) as feature elif args.get_mode() == "c": - features = ["x", "y"] - X = df[features] + load_dataframe = make_dataframe(lambda df: df) + features = make_features(xy) + score = make_score_function(lambda x, y: [x, y]) + # default use vector length + else: + load_dataframe = make_dataframe(lambda df: df) + features = make_features(radius) + score = make_score_function(lambda x, y: [np.sqrt(x**2 + y**2)]) + + print("\n") + + apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph()) - y = pd.get_dummies(df['points']) - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) - - random_forest = RandomForestClassifier(n_estimators=700, random_state=0) - decision_tree = DecisionTreeClassifier(random_state=0) - k_neighbors = KNeighborsClassifier(n_neighbors=5) - - models = { - "Random Forest Classifier": random_forest, - "Decision Tree Classifier": decision_tree, - "K-Neighbors": k_neighbors - } - - for name, model in models.items(): - model.fit(X_train.values, y_train.values) - - for name, model in models.items(): - pred = model.predict(X_test.values) - - my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}\n') - - f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}\n') - - score = get_score_from_cli(args.get_mode) - - label_encoder = LabelEncoder() - df["points"] = label_encoder.fit_transform(df["points"]) - - for name, model in models.items(): - pred = model.predict(score) - points_number = pd.DataFrame(pred).idxmax(axis=1) - points = label_encoder.inverse_transform(points_number)[0] - print(f"{name}: {points} Punkte") - - if __name__ == "__main__": main() \ No newline at end of file diff --git a/py/absolut.py b/py/absolut.py index 06edbea..db094c8 100644 --- a/py/absolut.py +++ b/py/absolut.py @@ -1,64 +1,15 @@ -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -from sklearn.metrics import f1_score -from sklearn.model_selection import train_test_split -from sklearn.ensemble import RandomForestClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.tree import DecisionTreeClassifier -from sklearn.preprocessing import LabelEncoder - -FEATURES = ["points", "x", "y"] +from py.my_functions import * # create dataframe from csv and drop any row with null values -def load_dataframe(): +def load_dataframe(file_path): try: colum_list = FEATURES - df = pd.read_csv("data/synthetic_data.csv", usecols = colum_list).dropna() + df = pd.read_csv(file_path, usecols = colum_list).dropna() return df.abs() except FileNotFoundError as error: print(error) quit() -def calc_f1_macro(y_true, y_pred): - f1_scores = [] - for column in y_true: - score = calc_f1_score(y_true[column].values, y_pred[column]) - f1_scores.append(score) - return np.mean(f1_scores) - -def calc_f1_score(y_true, y_pred): - tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) - tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) - fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) - fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) - - precision = calc_precision(tp, fp) - recall = calc_recall(tp, fn) - - '''if tp != 0 and fp != 0: - precision = calc_precision(tp, fp) - else: - precision = 0 - - if tp != 0 and fn != 0: - recall = calc_recall(tp, fn) - else: - recall = 0''' - - if precision != 0 and recall != 0: - f1 = (2 * precision * recall) / (precision + recall) - else: - f1 = 0 - return f1 - -def calc_precision(tp, fp): - return tp / (tp + fp) - -def calc_recall(tp, fn): - return tp / (tp + fn) - def get_score_from_cli(): try: x = float(input("x: ")) @@ -68,24 +19,31 @@ def get_score_from_cli(): print("Invalid input. Please enter numeric values.") return None -def main(): - df = load_dataframe() - print(df.describe()) - print(df.head()) - print(df.head().info()) - - sns.countplot(x = df["points"]) - plt.show() +def absolut(file_path, inf, graph): - sns.heatmap(df.corr(), annot=True, cmap='coolwarm') - plt.show() + # load dataframe with argument [1] + df = load_dataframe(file_path) + + # print dataframe information if argument [3] is true + if inf: + print(df.describe()) + print(df.head()) + print(df.head().info()) - sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) - plt.show() + # display graphs if argument [4] is true + if graph: + sns.countplot(x = df["points"]) + plt.show() + + sns.heatmap(df.corr(), annot=True, cmap='coolwarm') + plt.show() + + sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) + plt.show() features = ["x", "y"] X = df[features] - + y = pd.get_dummies(df['points']) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) @@ -107,10 +65,10 @@ def main(): pred = model.predict(X_test.values) my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}\n') + print(f'My F1 score of {name} is {my_f1_macro_score}') f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}\n') + print(f'Sklearn F1 score of {name} is {f1_sklearn}') score = get_score_from_cli() @@ -121,8 +79,4 @@ def main(): pred = model.predict(score) points_number = pd.DataFrame(pred).idxmax(axis=1) points = label_encoder.inverse_transform(points_number)[0] - print(f"{name}: {points} Punkte") - - -if __name__ == "__main__": - main() \ No newline at end of file + print(f"{name}: {points} Punkte") \ No newline at end of file diff --git a/py/arguments.py b/py/arguments.py index 63a7808..b813913 100644 --- a/py/arguments.py +++ b/py/arguments.py @@ -31,25 +31,16 @@ class Arguments: return self.mode.value def set_mode(self, value): - try: - self.mode = Mode(value) - except ValueError: - raise ValueError(f"Invalid mode '{value}'. Allowed values: {[m.value for m in Mode]}") - + self.mode = Mode(value) + def get_information(self): return self.information.value def set_information(self, value): - try: - self.information = Information(value) - except ValueError: - raise ValueError(f"Invalid information '{value}'. Allowed values: {[m.value for m in Information]}") + self.information = Information(value) def get_graph(self): return self.graph.value def set_graph(self, value): - try: - self.graph = Graph(value) - except ValueError: - raise ValueError(f"Invalid graph '{value}'. Allowed values: {[m.value for m in Graph]}") + self.graph = Graph(value) \ No newline at end of file diff --git a/py/cartesian.py b/py/cartesian.py index e69de29..b0a463e 100644 --- a/py/cartesian.py +++ b/py/cartesian.py @@ -0,0 +1,82 @@ +from py.my_functions import * + +# create dataframe from csv and drop any row with null values +def load_dataframe(file_path): + try: + colum_list = FEATURES + df = pd.read_csv(file_path, usecols = colum_list).dropna() + return df + except FileNotFoundError as error: + print(error) + quit() + +def get_score_from_cli(): + try: + x = float(input("x: ")) + y = float(input("y: ")) + return np.array([x, y]).reshape(1, -1) + except ValueError: + print("Invalid input. Please enter numeric values.") + return None + +def cartesian(file_path, inf, graph): + + # load dataframe with argument [1] + df = load_dataframe(file_path) + + # print dataframe information if argument [3] is true + if inf: + print(df.describe()) + print(df.head()) + print(df.head().info()) + + # display graphs if argument [4] is true + if graph: + sns.countplot(x = df["points"]) + plt.show() + + sns.heatmap(df.corr(), annot=True, cmap='coolwarm') + plt.show() + + sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) + plt.show() + + features = ["x", "y"] + X = df[features] + + y = pd.get_dummies(df['points']) + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) + + random_forest = RandomForestClassifier(n_estimators=700, random_state=0) + decision_tree = DecisionTreeClassifier(random_state=0) + k_neighbors = KNeighborsClassifier(n_neighbors=5) + + models = { + "Random Forest Classifier": random_forest, + "Decision Tree Classifier": decision_tree, + "K-Neighbors": k_neighbors + } + + for name, model in models.items(): + model.fit(X_train.values, y_train.values) + + for name, model in models.items(): + pred = model.predict(X_test.values) + + my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) + print(f'My F1 score of {name} is {my_f1_macro_score}') + + f1_sklearn = f1_score(y_test.values, pred, average='macro') + print(f'Sklearn F1 score of {name} is {f1_sklearn}') + + score = get_score_from_cli() + + label_encoder = LabelEncoder() + df["points"] = label_encoder.fit_transform(df["points"]) + + for name, model in models.items(): + pred = model.predict(score) + points_number = pd.DataFrame(pred).idxmax(axis=1) + points = label_encoder.inverse_transform(points_number)[0] + print(f"{name}: {points} Punkte") \ No newline at end of file diff --git a/py/functions.py b/py/functions.py deleted file mode 100644 index e69de29..0000000 diff --git a/py/modell.py b/py/modell.py new file mode 100644 index 0000000..cc019e5 --- /dev/null +++ b/py/modell.py @@ -0,0 +1,137 @@ +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +from sklearn.metrics import f1_score +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.preprocessing import LabelEncoder + +# low amounts of features will result in many zero devision in tp=0 and fp=0 +np.seterr(divide='ignore', invalid='ignore') + +FEATURES = ["points", "x", "y"] + +def make_dataframe(transform): + def load_dataframe(file_path): + try: + colum_list = FEATURES + df = pd.read_csv(file_path, usecols = colum_list).dropna() + return transform(df) + except FileNotFoundError as error: + print(error) + quit() + return load_dataframe + +def make_features(selector): + def select(df): + return df + return select(selector) + +def radius(df): + df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) + return df[["radius"]] + +def xy(df): + features = ["x", "y"] + return df[features] + +def apply_model(df, features, score, inf, graph): + # print dataframe information + if inf: + print(df.describe()) + print(df.head()) + print(df.head().info()) + + # display graphs + if graph: + sns.countplot(x = df["points"]) + plt.show() + + sns.heatmap(df.corr(), annot=True, cmap='coolwarm') + plt.show() + + sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) + plt.show() + + y = pd.get_dummies(df['points']) + X = features(df) + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) + + random_forest = RandomForestClassifier(n_estimators=700, random_state=0) + decision_tree = DecisionTreeClassifier(random_state=0) + k_neighbors = KNeighborsClassifier(n_neighbors=5) + + models = { + "Random Forest Classifier": random_forest, + "Decision Tree Classifier": decision_tree, + "K-Neighbors": k_neighbors + } + + for name, model in models.items(): + model.fit(X_train.values, y_train.values) + + for name, model in models.items(): + pred = model.predict(X_test.values) + + my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) + print(f'My F1 score of {name} is {my_f1_macro_score}') + + f1_sklearn = f1_score(y_test.values, pred, average='macro') + print(f'Sklearn F1 score of {name} is {f1_sklearn}') + + score = score() + + label_encoder = LabelEncoder() + df["points"] = label_encoder.fit_transform(df["points"]) + + for name, model in models.items(): + pred = model.predict(score) + points_number = pd.DataFrame(pred).idxmax(axis=1) + points = label_encoder.inverse_transform(points_number)[0] + print(f"{name}: {points} Punkte") + + input("\nPress any key to continue...\n") + +# calc f1 macro +def calc_f1_macro(y_true, y_pred): + f1_scores = [] + for column in y_true: + score = calc_f1_score(y_true[column].values, y_pred[column]) + f1_scores.append(score) + return np.mean(f1_scores) + +def calc_f1_score(y_true, y_pred): + tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) + tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) + fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) + fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) + + precision = calc_precision(tp, fp) + recall = calc_recall(tp, fn) + + if precision != 0 and recall != 0: + f1 = (2 * precision * recall) / (precision + recall) + else: + f1 = 0 + return f1 + +def calc_precision(tp, fp): + return tp / (tp + fp) + +def calc_recall(tp, fn): + return tp / (tp + fn) + +def make_score_function(transform): + def get_score_from_cli(): + try: + x = float(input("x: ")) + y = float(input("y: ")) + return np.array([transform(x, y)]).reshape(1, -1) + except ValueError: + print("Invalid input. Please enter numeric values.") + return None + return get_score_from_cli \ No newline at end of file diff --git a/py/my_functions.py b/py/my_functions.py new file mode 100644 index 0000000..303b57b --- /dev/null +++ b/py/my_functions.py @@ -0,0 +1,88 @@ +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +from sklearn.metrics import f1_score +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.preprocessing import LabelEncoder + +np.seterr(divide='ignore', invalid='ignore') + +FEATURES = ["points", "x", "y"] + +def make_dataframe(transform): + def load_dataframe(file_path): + try: + colum_list = FEATURES + df = pd.read_csv(file_path, usecols = colum_list).dropna() + return transform(df) + except FileNotFoundError as error: + print(error) + quit() + +def calc_f1_macro(y_true, y_pred): + f1_scores = [] + for column in y_true: + score = calc_f1_score(y_true[column].values, y_pred[column]) + f1_scores.append(score) + return np.mean(f1_scores) + +def calc_f1_score(y_true, y_pred): + tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) + tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) + fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) + fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) + + precision = calc_precision(tp, fp) + recall = calc_recall(tp, fn) + + if precision != 0 and recall != 0: + f1 = (2 * precision * recall) / (precision + recall) + else: + f1 = 0 + return f1 + +def calc_precision(tp, fp): + return tp / (tp + fp) + +def calc_recall(tp, fn): + return tp / (tp + fn) + +def apply_model(X, y): + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) + + random_forest = RandomForestClassifier(n_estimators=700, random_state=0) + decision_tree = DecisionTreeClassifier(random_state=0) + k_neighbors = KNeighborsClassifier(n_neighbors=5) + + models = { + "Random Forest Classifier": random_forest, + "Decision Tree Classifier": decision_tree, + "K-Neighbors": k_neighbors + } + + for name, model in models.items(): + model.fit(X_train.values, y_train.values) + + for name, model in models.items(): + pred = model.predict(X_test.values) + + my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) + print(f'My F1 score of {name} is {my_f1_macro_score}') + + f1_sklearn = f1_score(y_test.values, pred, average='macro') + print(f'Sklearn F1 score of {name} is {f1_sklearn}') + +def make_score_function(transform): + def get_score_from_cli(): + try: + x = float(input("x: ")) + y = float(input("y: ")) + return np.array([transform(x, y)]).reshape(1, -1) + except ValueError: + print("Invalid input. Please enter numeric values.") + return None + return get_score_from_cli \ No newline at end of file diff --git a/py/vector.py b/py/vector.py index 13d713b..94c6b07 100644 --- a/py/vector.py +++ b/py/vector.py @@ -1,64 +1,15 @@ -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -from sklearn.metrics import f1_score -from sklearn.model_selection import train_test_split -from sklearn.ensemble import RandomForestClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.tree import DecisionTreeClassifier -from sklearn.preprocessing import LabelEncoder - -FEATURES = ["points", "x", "y"] +from my_functions import * # create dataframe from csv and drop any row with null values -def load_dataframe(): +def load_dataframe(file_path): try: colum_list = FEATURES - df = pd.read_csv("data/synthetic_data.csv", usecols = colum_list).dropna() + df = pd.read_csv(file_path, usecols = colum_list).dropna() return df except FileNotFoundError as error: print(error) quit() -def calc_f1_macro(y_true, y_pred): - f1_scores = [] - for column in y_true: - score = calc_f1_score(y_true[column].values, y_pred[column]) - f1_scores.append(score) - return np.mean(f1_scores) - -def calc_f1_score(y_true, y_pred): - tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) - tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) - fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) - fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) - - precision = calc_precision(tp, fp) - recall = calc_recall(tp, fn) - - '''if tp != 0 and fp != 0: - precision = calc_precision(tp, fp) - else: - precision = 0 - - if tp != 0 and fn != 0: - recall = calc_recall(tp, fn) - else: - recall = 0''' - - if precision != 0 and recall != 0: - f1 = (2 * precision * recall) / (precision + recall) - else: - f1 = 0 - return f1 - -def calc_precision(tp, fp): - return tp / (tp + fp) - -def calc_recall(tp, fn): - return tp / (tp + fn) - def get_score_from_cli(): try: x = float(input("x: ")) @@ -69,18 +20,27 @@ def get_score_from_cli(): print("Invalid input. Please enter numeric values.") return None -def main(): - df = load_dataframe() - print(df.head()) +def vector(file_path, inf, graph): + + # load dataframe + df = load_dataframe(file_path) + + # print dataframe information + if inf: + print(df.describe()) + print(df.head()) + print(df.head().info()) - sns.countplot(x = df["points"]) - plt.show() + # display graphs + if graph: + sns.countplot(x = df["points"]) + plt.show() - sns.heatmap(df.corr(), annot=True, cmap='coolwarm') - plt.show() + sns.heatmap(df.corr(), annot=True, cmap='coolwarm') + plt.show() - sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) - plt.show() + sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) + plt.show() df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) X = df[["radius"]] @@ -89,6 +49,9 @@ def main(): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) + print(X_train) + quit() + random_forest = RandomForestClassifier(n_estimators=700, random_state=0) decision_tree = DecisionTreeClassifier(random_state=0) k_neighbors = KNeighborsClassifier(n_neighbors=5) @@ -106,10 +69,10 @@ def main(): pred = model.predict(X_test.values) my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}\n') + print(f'My F1 score of {name} is {my_f1_macro_score}') f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}\n') + print(f'Sklearn F1 score of {name} is {f1_sklearn}') score = get_score_from_cli() @@ -122,6 +85,4 @@ def main(): points = label_encoder.inverse_transform(points_number)[0] print(f"{name}: {points} Punkte") - -if __name__ == "__main__": - main() \ No newline at end of file +vector("data/shots.csv", False, False) \ No newline at end of file