commented code

2025-11-30 10:14:41 +01:00 · 2025-11-30 10:14:41 +01:00 · cafea4bab1
commit cafea4bab1
parent efe9a1afe3
4 changed files with 44 additions and 9 deletions
--- a/main.py
+++ b/main.py
@ -1,7 +1,8 @@
 import sys
 from py.arguments import Arguments
-from py.modell import *
+from py.model import *

+# CLI argument for file path necessary
 if not sys.argv[1:]:
    print("Usage: python3 main.py <path to csv>")
    sys.exit(1)
@ -15,6 +16,7 @@ def main():
    args.set_graph(False)

    while repeat:
+        # Display settings
        print("Currently selected setting:")
        print(f"File: {args.get_file_path()}")
        print(f"Mode: {args.get_mode()}")
@ -57,6 +59,7 @@ def main():

        print("\n")

+        # apply model, function in py/model.py
        apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph())
        
 if __name__ == "__main__":
--- a/py/arguments.py
+++ b/py/arguments.py
@ -1,46 +1,59 @@
 from enum import Enum

+# Possible modes v = vector length of (x, y) / a = absolut values, reduce spread / c = cartesian, empirical values
 class Mode(Enum):
    V = "v"
    A = "a"
    C = "c"

+# Display pandas dataframe description, head and info
 class Information(Enum):
    DISABLED = False
    ENABLED = True

+# Display countplot, heatmap and scatterplot graphs
 class Graph(Enum):
    DISABLED = False
    ENABLED = True

+# Class Arguments for accepted values 
 class Arguments:

+    # Constructor. Default params: "filepath", v, False, False
    def __init__(self, file_path, mode, information, graph):
        self.file_path = file_path
        self.mode = mode
        self.information = information
        self.graph = graph

+    # Filepath getter
    def get_file_path(self):
        return self.file_path

+    # Filepath setter
    def set_file_path(self, value):
        self.file_path = value

+    # Mode getter
    def get_mode(self):
        return self.mode.value

+    # Mode setter
    def set_mode(self, value):
        self.mode = Mode(value)
        
+    # Information getter
    def get_information(self):
        return self.information.value

+    # Information setter
    def set_information(self, value):
        self.information = Information(value)

+    # Graph getter 
    def get_graph(self):
        return self.graph.value

+    # GRaph setter
    def set_graph(self, value):
        self.graph = Graph(value)
--- a/py/generate_synthetic_shots.py
+++ b/py/generate_synthetic_shots.py
@ -10,16 +10,17 @@ Export the dataset to "data/synthetic_shots.csv"
 ---------------------------------------------------'''

 import sys
-import pandas as pd
 import numpy as np
 import csv

 args = sys.argv[1:]

+# CLI argument for amount necessary
 if not args:
    print("Usage: python3 generate_synthetic_shots.py <number of generated shots>")
    sys.exit(1)

+# Amount to generate
 n = int(args[0])

 # Area circle
@ -36,15 +37,14 @@ A1 = 10.5 ** 2 * np.pi

 possible_values = np.linspace(-10, 10, 41) # fromn -10 to 10 with step 0.5

-xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)]
+xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)] # numpy generates n amounts of (x, y) coordinates

 dataset = []

+# Apply score to coordinate based on area comparison
 for i in xy:
    A = (i[0]**2 + i[1]**2) * np.pi

-    #print(A)
-
    if A <= A10:
        dataset.append([10, i[0], i[1]])
    elif A > A10 and A <= A9:
@ -68,6 +68,7 @@ for i in xy:
    elif A > A1:
        dataset.append([0, i[0], i[1]])

+# Write synthetic dataset in data/synthetic_shots.csv   
 with open('data/synthetic_shots.csv', 'w', newline='') as csvfile:
    fieldnames = ['points', 'x', 'y']
    writer = csv.writer(csvfile)
--- a/py/modell.py
+++ b/py/modell.py
@ -14,6 +14,7 @@ np.seterr(divide='ignore', invalid='ignore')

 FEATURES = ["points", "x", "y"]

+# create dataframe with csv file
 def make_dataframe(transform):
    def load_dataframe(file_path):
        try:
@ -25,20 +26,25 @@ def make_dataframe(transform):
            quit()
    return load_dataframe

+# depending on mode, [x, y] cordinates are used as feature or length of vector (x, y) [radius] is used
 def make_features(selector):
    def select(df):
        return df
    return select(selector)

+# Feature radius when mode = v
 def radius(df):
    df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
    return df[["radius"]]

+# Feature ["x", "y"] when mode = a or c
 def xy(df):
    features = ["x", "y"]
    return df[features]

+# apply model on dataframe. Params: df = dataframe, features = function make_features, inf = True or False, graph = True or False
 def apply_model(df, features, score, inf, graph):
+    
    # print dataframe information
    if inf:
        print(df.describe())
@ -57,12 +63,18 @@ def apply_model(df, features, score, inf, graph):
        plt.show()
    
    y = pd.get_dummies(df['points'])
-    X = features(df)
+    X = features(df) # select which features to use radius or xy

+    # Split data into 60/40 (train/test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

+    # Create a RandomForestClassifier with n_estimators=700
    random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
+
+    # Create a DecisionTreeClassifier
    decision_tree = DecisionTreeClassifier(random_state=0)
+
+    # Create a KNeighborsClassifier with n_neighbors=5
    k_neighbors = KNeighborsClassifier(n_neighbors=5)

    models = {
@ -77,13 +89,15 @@ def apply_model(df, features, score, inf, graph):
    for name, model in models.items():
        pred = model.predict(X_test.values)

+        # calculate f1 with own function
        my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
        print(f'My F1 score of {name} is {my_f1_macro_score}')
        
+        # calculate f1 with sklearn function
        f1_sklearn = f1_score(y_test.values, pred, average='macro')
        print(f'Sklearn F1 score of {name} is {f1_sklearn}')
    
-    score = score()
+    score = score() # promt for x, y coordinates and transform score based on mode 

    label_encoder = LabelEncoder()
    df["points"] = label_encoder.fit_transform(df["points"])
@ -104,6 +118,7 @@ def calc_f1_macro(y_true, y_pred):
        f1_scores.append(score)
    return np.mean(f1_scores)

+# calc f1 score
 def calc_f1_score(y_true, y_pred):
    tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
    tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
@ -119,12 +134,15 @@ def calc_f1_score(y_true, y_pred):
        f1 = 0
    return f1

+# calc precision
 def calc_precision(tp, fp):
    return tp / (tp + fp)

+# calc recall
 def calc_recall(tp, fn):
    return tp / (tp + fn)

+# ask for x, y value and return transformed array based on mode
 def make_score_function(transform):
    def get_score_from_cli():
        try: