diff --git a/main.py b/main.py index 3477b2d..76378f1 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,8 @@ import sys from py.arguments import Arguments -from py.modell import * +from py.model import * +# CLI argument for file path necessary if not sys.argv[1:]: print("Usage: python3 main.py ") sys.exit(1) @@ -15,6 +16,7 @@ def main(): args.set_graph(False) while repeat: + # Display settings print("Currently selected setting:") print(f"File: {args.get_file_path()}") print(f"Mode: {args.get_mode()}") @@ -57,6 +59,7 @@ def main(): print("\n") + # apply model, function in py/model.py apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph()) if __name__ == "__main__": diff --git a/py/arguments.py b/py/arguments.py index b813913..628b508 100644 --- a/py/arguments.py +++ b/py/arguments.py @@ -1,46 +1,59 @@ from enum import Enum +# Possible modes v = vector length of (x, y) / a = absolut values, reduce spread / c = cartesian, empirical values class Mode(Enum): V = "v" A = "a" C = "c" +# Display pandas dataframe description, head and info class Information(Enum): DISABLED = False ENABLED = True +# Display countplot, heatmap and scatterplot graphs class Graph(Enum): DISABLED = False ENABLED = True +# Class Arguments for accepted values class Arguments: + # Constructor. Default params: "filepath", v, False, False def __init__(self, file_path, mode, information, graph): self.file_path = file_path self.mode = mode self.information = information self.graph = graph + # Filepath getter def get_file_path(self): return self.file_path + # Filepath setter def set_file_path(self, value): self.file_path = value + # Mode getter def get_mode(self): return self.mode.value + # Mode setter def set_mode(self, value): self.mode = Mode(value) + # Information getter def get_information(self): return self.information.value + # Information setter def set_information(self, value): self.information = Information(value) - + + # Graph getter def get_graph(self): return self.graph.value + # GRaph setter def set_graph(self, value): self.graph = Graph(value) \ No newline at end of file diff --git a/py/generate_synthetic_shots.py b/py/generate_synthetic_shots.py index f93e76e..b728ecb 100644 --- a/py/generate_synthetic_shots.py +++ b/py/generate_synthetic_shots.py @@ -10,16 +10,17 @@ Export the dataset to "data/synthetic_shots.csv" ---------------------------------------------------''' import sys -import pandas as pd import numpy as np import csv args = sys.argv[1:] +# CLI argument for amount necessary if not args: print("Usage: python3 generate_synthetic_shots.py ") sys.exit(1) +# Amount to generate n = int(args[0]) # Area circle @@ -36,14 +37,13 @@ A1 = 10.5 ** 2 * np.pi possible_values = np.linspace(-10, 10, 41) # fromn -10 to 10 with step 0.5 -xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)] +xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)] # numpy generates n amounts of (x, y) coordinates dataset = [] +# Apply score to coordinate based on area comparison for i in xy: A = (i[0]**2 + i[1]**2) * np.pi - - #print(A) if A <= A10: dataset.append([10, i[0], i[1]]) @@ -67,7 +67,8 @@ for i in xy: dataset.append([1, i[0], i[1]]) elif A > A1: dataset.append([0, i[0], i[1]]) - + +# Write synthetic dataset in data/synthetic_shots.csv with open('data/synthetic_shots.csv', 'w', newline='') as csvfile: fieldnames = ['points', 'x', 'y'] writer = csv.writer(csvfile) diff --git a/py/modell.py b/py/model.py similarity index 81% rename from py/modell.py rename to py/model.py index cc019e5..9222e52 100644 --- a/py/modell.py +++ b/py/model.py @@ -14,6 +14,7 @@ np.seterr(divide='ignore', invalid='ignore') FEATURES = ["points", "x", "y"] +# create dataframe with csv file def make_dataframe(transform): def load_dataframe(file_path): try: @@ -25,20 +26,25 @@ def make_dataframe(transform): quit() return load_dataframe +# depending on mode, [x, y] cordinates are used as feature or length of vector (x, y) [radius] is used def make_features(selector): def select(df): return df return select(selector) +# Feature radius when mode = v def radius(df): df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) return df[["radius"]] +# Feature ["x", "y"] when mode = a or c def xy(df): features = ["x", "y"] return df[features] +# apply model on dataframe. Params: df = dataframe, features = function make_features, inf = True or False, graph = True or False def apply_model(df, features, score, inf, graph): + # print dataframe information if inf: print(df.describe()) @@ -57,12 +63,18 @@ def apply_model(df, features, score, inf, graph): plt.show() y = pd.get_dummies(df['points']) - X = features(df) + X = features(df) # select which features to use radius or xy + # Split data into 60/40 (train/test) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) + # Create a RandomForestClassifier with n_estimators=700 random_forest = RandomForestClassifier(n_estimators=700, random_state=0) + + # Create a DecisionTreeClassifier decision_tree = DecisionTreeClassifier(random_state=0) + + # Create a KNeighborsClassifier with n_neighbors=5 k_neighbors = KNeighborsClassifier(n_neighbors=5) models = { @@ -77,13 +89,15 @@ def apply_model(df, features, score, inf, graph): for name, model in models.items(): pred = model.predict(X_test.values) + # calculate f1 with own function my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) print(f'My F1 score of {name} is {my_f1_macro_score}') + # calculate f1 with sklearn function f1_sklearn = f1_score(y_test.values, pred, average='macro') print(f'Sklearn F1 score of {name} is {f1_sklearn}') - score = score() + score = score() # promt for x, y coordinates and transform score based on mode label_encoder = LabelEncoder() df["points"] = label_encoder.fit_transform(df["points"]) @@ -104,6 +118,7 @@ def calc_f1_macro(y_true, y_pred): f1_scores.append(score) return np.mean(f1_scores) +# calc f1 score def calc_f1_score(y_true, y_pred): tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) @@ -119,12 +134,15 @@ def calc_f1_score(y_true, y_pred): f1 = 0 return f1 +# calc precision def calc_precision(tp, fp): return tp / (tp + fp) +# calc recall def calc_recall(tp, fn): return tp / (tp + fn) +# ask for x, y value and return transformed array based on mode def make_score_function(transform): def get_score_from_cli(): try: