commented code

This commit is contained in:
Sandro Zimmermann 2025-11-30 10:14:41 +01:00
parent efe9a1afe3
commit cafea4bab1
4 changed files with 44 additions and 9 deletions

View File

@ -1,7 +1,8 @@
import sys import sys
from py.arguments import Arguments from py.arguments import Arguments
from py.modell import * from py.model import *
# CLI argument for file path necessary
if not sys.argv[1:]: if not sys.argv[1:]:
print("Usage: python3 main.py <path to csv>") print("Usage: python3 main.py <path to csv>")
sys.exit(1) sys.exit(1)
@ -15,6 +16,7 @@ def main():
args.set_graph(False) args.set_graph(False)
while repeat: while repeat:
# Display settings
print("Currently selected setting:") print("Currently selected setting:")
print(f"File: {args.get_file_path()}") print(f"File: {args.get_file_path()}")
print(f"Mode: {args.get_mode()}") print(f"Mode: {args.get_mode()}")
@ -57,6 +59,7 @@ def main():
print("\n") print("\n")
# apply model, function in py/model.py
apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph()) apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph())
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,46 +1,59 @@
from enum import Enum from enum import Enum
# Possible modes v = vector length of (x, y) / a = absolut values, reduce spread / c = cartesian, empirical values
class Mode(Enum): class Mode(Enum):
V = "v" V = "v"
A = "a" A = "a"
C = "c" C = "c"
# Display pandas dataframe description, head and info
class Information(Enum): class Information(Enum):
DISABLED = False DISABLED = False
ENABLED = True ENABLED = True
# Display countplot, heatmap and scatterplot graphs
class Graph(Enum): class Graph(Enum):
DISABLED = False DISABLED = False
ENABLED = True ENABLED = True
# Class Arguments for accepted values
class Arguments: class Arguments:
# Constructor. Default params: "filepath", v, False, False
def __init__(self, file_path, mode, information, graph): def __init__(self, file_path, mode, information, graph):
self.file_path = file_path self.file_path = file_path
self.mode = mode self.mode = mode
self.information = information self.information = information
self.graph = graph self.graph = graph
# Filepath getter
def get_file_path(self): def get_file_path(self):
return self.file_path return self.file_path
# Filepath setter
def set_file_path(self, value): def set_file_path(self, value):
self.file_path = value self.file_path = value
# Mode getter
def get_mode(self): def get_mode(self):
return self.mode.value return self.mode.value
# Mode setter
def set_mode(self, value): def set_mode(self, value):
self.mode = Mode(value) self.mode = Mode(value)
# Information getter
def get_information(self): def get_information(self):
return self.information.value return self.information.value
# Information setter
def set_information(self, value): def set_information(self, value):
self.information = Information(value) self.information = Information(value)
# Graph getter
def get_graph(self): def get_graph(self):
return self.graph.value return self.graph.value
# GRaph setter
def set_graph(self, value): def set_graph(self, value):
self.graph = Graph(value) self.graph = Graph(value)

View File

@ -10,16 +10,17 @@ Export the dataset to "data/synthetic_shots.csv"
---------------------------------------------------''' ---------------------------------------------------'''
import sys import sys
import pandas as pd
import numpy as np import numpy as np
import csv import csv
args = sys.argv[1:] args = sys.argv[1:]
# CLI argument for amount necessary
if not args: if not args:
print("Usage: python3 generate_synthetic_shots.py <number of generated shots>") print("Usage: python3 generate_synthetic_shots.py <number of generated shots>")
sys.exit(1) sys.exit(1)
# Amount to generate
n = int(args[0]) n = int(args[0])
# Area circle # Area circle
@ -36,15 +37,14 @@ A1 = 10.5 ** 2 * np.pi
possible_values = np.linspace(-10, 10, 41) # fromn -10 to 10 with step 0.5 possible_values = np.linspace(-10, 10, 41) # fromn -10 to 10 with step 0.5
xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)] xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)] # numpy generates n amounts of (x, y) coordinates
dataset = [] dataset = []
# Apply score to coordinate based on area comparison
for i in xy: for i in xy:
A = (i[0]**2 + i[1]**2) * np.pi A = (i[0]**2 + i[1]**2) * np.pi
#print(A)
if A <= A10: if A <= A10:
dataset.append([10, i[0], i[1]]) dataset.append([10, i[0], i[1]])
elif A > A10 and A <= A9: elif A > A10 and A <= A9:
@ -68,6 +68,7 @@ for i in xy:
elif A > A1: elif A > A1:
dataset.append([0, i[0], i[1]]) dataset.append([0, i[0], i[1]])
# Write synthetic dataset in data/synthetic_shots.csv
with open('data/synthetic_shots.csv', 'w', newline='') as csvfile: with open('data/synthetic_shots.csv', 'w', newline='') as csvfile:
fieldnames = ['points', 'x', 'y'] fieldnames = ['points', 'x', 'y']
writer = csv.writer(csvfile) writer = csv.writer(csvfile)

View File

@ -14,6 +14,7 @@ np.seterr(divide='ignore', invalid='ignore')
FEATURES = ["points", "x", "y"] FEATURES = ["points", "x", "y"]
# create dataframe with csv file
def make_dataframe(transform): def make_dataframe(transform):
def load_dataframe(file_path): def load_dataframe(file_path):
try: try:
@ -25,20 +26,25 @@ def make_dataframe(transform):
quit() quit()
return load_dataframe return load_dataframe
# depending on mode, [x, y] cordinates are used as feature or length of vector (x, y) [radius] is used
def make_features(selector): def make_features(selector):
def select(df): def select(df):
return df return df
return select(selector) return select(selector)
# Feature radius when mode = v
def radius(df): def radius(df):
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
return df[["radius"]] return df[["radius"]]
# Feature ["x", "y"] when mode = a or c
def xy(df): def xy(df):
features = ["x", "y"] features = ["x", "y"]
return df[features] return df[features]
# apply model on dataframe. Params: df = dataframe, features = function make_features, inf = True or False, graph = True or False
def apply_model(df, features, score, inf, graph): def apply_model(df, features, score, inf, graph):
# print dataframe information # print dataframe information
if inf: if inf:
print(df.describe()) print(df.describe())
@ -57,12 +63,18 @@ def apply_model(df, features, score, inf, graph):
plt.show() plt.show()
y = pd.get_dummies(df['points']) y = pd.get_dummies(df['points'])
X = features(df) X = features(df) # select which features to use radius or xy
# Split data into 60/40 (train/test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
# Create a RandomForestClassifier with n_estimators=700
random_forest = RandomForestClassifier(n_estimators=700, random_state=0) random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
# Create a DecisionTreeClassifier
decision_tree = DecisionTreeClassifier(random_state=0) decision_tree = DecisionTreeClassifier(random_state=0)
# Create a KNeighborsClassifier with n_neighbors=5
k_neighbors = KNeighborsClassifier(n_neighbors=5) k_neighbors = KNeighborsClassifier(n_neighbors=5)
models = { models = {
@ -77,13 +89,15 @@ def apply_model(df, features, score, inf, graph):
for name, model in models.items(): for name, model in models.items():
pred = model.predict(X_test.values) pred = model.predict(X_test.values)
# calculate f1 with own function
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}') print(f'My F1 score of {name} is {my_f1_macro_score}')
# calculate f1 with sklearn function
f1_sklearn = f1_score(y_test.values, pred, average='macro') f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}') print(f'Sklearn F1 score of {name} is {f1_sklearn}')
score = score() score = score() # promt for x, y coordinates and transform score based on mode
label_encoder = LabelEncoder() label_encoder = LabelEncoder()
df["points"] = label_encoder.fit_transform(df["points"]) df["points"] = label_encoder.fit_transform(df["points"])
@ -104,6 +118,7 @@ def calc_f1_macro(y_true, y_pred):
f1_scores.append(score) f1_scores.append(score)
return np.mean(f1_scores) return np.mean(f1_scores)
# calc f1 score
def calc_f1_score(y_true, y_pred): def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
@ -119,12 +134,15 @@ def calc_f1_score(y_true, y_pred):
f1 = 0 f1 = 0
return f1 return f1
# calc precision
def calc_precision(tp, fp): def calc_precision(tp, fp):
return tp / (tp + fp) return tp / (tp + fp)
# calc recall
def calc_recall(tp, fn): def calc_recall(tp, fn):
return tp / (tp + fn) return tp / (tp + fn)
# ask for x, y value and return transformed array based on mode
def make_score_function(transform): def make_score_function(transform):
def get_score_from_cli(): def get_score_from_cli():
try: try: