commented code
This commit is contained in:
parent
efe9a1afe3
commit
cafea4bab1
5
main.py
5
main.py
@ -1,7 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
from py.arguments import Arguments
|
from py.arguments import Arguments
|
||||||
from py.modell import *
|
from py.model import *
|
||||||
|
|
||||||
|
# CLI argument for file path necessary
|
||||||
if not sys.argv[1:]:
|
if not sys.argv[1:]:
|
||||||
print("Usage: python3 main.py <path to csv>")
|
print("Usage: python3 main.py <path to csv>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@ -15,6 +16,7 @@ def main():
|
|||||||
args.set_graph(False)
|
args.set_graph(False)
|
||||||
|
|
||||||
while repeat:
|
while repeat:
|
||||||
|
# Display settings
|
||||||
print("Currently selected setting:")
|
print("Currently selected setting:")
|
||||||
print(f"File: {args.get_file_path()}")
|
print(f"File: {args.get_file_path()}")
|
||||||
print(f"Mode: {args.get_mode()}")
|
print(f"Mode: {args.get_mode()}")
|
||||||
@ -57,6 +59,7 @@ def main():
|
|||||||
|
|
||||||
print("\n")
|
print("\n")
|
||||||
|
|
||||||
|
# apply model, function in py/model.py
|
||||||
apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph())
|
apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph())
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -1,46 +1,59 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
# Possible modes v = vector length of (x, y) / a = absolut values, reduce spread / c = cartesian, empirical values
|
||||||
class Mode(Enum):
|
class Mode(Enum):
|
||||||
V = "v"
|
V = "v"
|
||||||
A = "a"
|
A = "a"
|
||||||
C = "c"
|
C = "c"
|
||||||
|
|
||||||
|
# Display pandas dataframe description, head and info
|
||||||
class Information(Enum):
|
class Information(Enum):
|
||||||
DISABLED = False
|
DISABLED = False
|
||||||
ENABLED = True
|
ENABLED = True
|
||||||
|
|
||||||
|
# Display countplot, heatmap and scatterplot graphs
|
||||||
class Graph(Enum):
|
class Graph(Enum):
|
||||||
DISABLED = False
|
DISABLED = False
|
||||||
ENABLED = True
|
ENABLED = True
|
||||||
|
|
||||||
|
# Class Arguments for accepted values
|
||||||
class Arguments:
|
class Arguments:
|
||||||
|
|
||||||
|
# Constructor. Default params: "filepath", v, False, False
|
||||||
def __init__(self, file_path, mode, information, graph):
|
def __init__(self, file_path, mode, information, graph):
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
self.mode = mode
|
self.mode = mode
|
||||||
self.information = information
|
self.information = information
|
||||||
self.graph = graph
|
self.graph = graph
|
||||||
|
|
||||||
|
# Filepath getter
|
||||||
def get_file_path(self):
|
def get_file_path(self):
|
||||||
return self.file_path
|
return self.file_path
|
||||||
|
|
||||||
|
# Filepath setter
|
||||||
def set_file_path(self, value):
|
def set_file_path(self, value):
|
||||||
self.file_path = value
|
self.file_path = value
|
||||||
|
|
||||||
|
# Mode getter
|
||||||
def get_mode(self):
|
def get_mode(self):
|
||||||
return self.mode.value
|
return self.mode.value
|
||||||
|
|
||||||
|
# Mode setter
|
||||||
def set_mode(self, value):
|
def set_mode(self, value):
|
||||||
self.mode = Mode(value)
|
self.mode = Mode(value)
|
||||||
|
|
||||||
|
# Information getter
|
||||||
def get_information(self):
|
def get_information(self):
|
||||||
return self.information.value
|
return self.information.value
|
||||||
|
|
||||||
|
# Information setter
|
||||||
def set_information(self, value):
|
def set_information(self, value):
|
||||||
self.information = Information(value)
|
self.information = Information(value)
|
||||||
|
|
||||||
|
# Graph getter
|
||||||
def get_graph(self):
|
def get_graph(self):
|
||||||
return self.graph.value
|
return self.graph.value
|
||||||
|
|
||||||
|
# GRaph setter
|
||||||
def set_graph(self, value):
|
def set_graph(self, value):
|
||||||
self.graph = Graph(value)
|
self.graph = Graph(value)
|
||||||
@ -10,16 +10,17 @@ Export the dataset to "data/synthetic_shots.csv"
|
|||||||
---------------------------------------------------'''
|
---------------------------------------------------'''
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import csv
|
import csv
|
||||||
|
|
||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
# CLI argument for amount necessary
|
||||||
if not args:
|
if not args:
|
||||||
print("Usage: python3 generate_synthetic_shots.py <number of generated shots>")
|
print("Usage: python3 generate_synthetic_shots.py <number of generated shots>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Amount to generate
|
||||||
n = int(args[0])
|
n = int(args[0])
|
||||||
|
|
||||||
# Area circle
|
# Area circle
|
||||||
@ -36,15 +37,14 @@ A1 = 10.5 ** 2 * np.pi
|
|||||||
|
|
||||||
possible_values = np.linspace(-10, 10, 41) # fromn -10 to 10 with step 0.5
|
possible_values = np.linspace(-10, 10, 41) # fromn -10 to 10 with step 0.5
|
||||||
|
|
||||||
xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)]
|
xy = [(np.random.choice(possible_values), np.random.choice(possible_values)) for _ in range(n)] # numpy generates n amounts of (x, y) coordinates
|
||||||
|
|
||||||
dataset = []
|
dataset = []
|
||||||
|
|
||||||
|
# Apply score to coordinate based on area comparison
|
||||||
for i in xy:
|
for i in xy:
|
||||||
A = (i[0]**2 + i[1]**2) * np.pi
|
A = (i[0]**2 + i[1]**2) * np.pi
|
||||||
|
|
||||||
#print(A)
|
|
||||||
|
|
||||||
if A <= A10:
|
if A <= A10:
|
||||||
dataset.append([10, i[0], i[1]])
|
dataset.append([10, i[0], i[1]])
|
||||||
elif A > A10 and A <= A9:
|
elif A > A10 and A <= A9:
|
||||||
@ -68,6 +68,7 @@ for i in xy:
|
|||||||
elif A > A1:
|
elif A > A1:
|
||||||
dataset.append([0, i[0], i[1]])
|
dataset.append([0, i[0], i[1]])
|
||||||
|
|
||||||
|
# Write synthetic dataset in data/synthetic_shots.csv
|
||||||
with open('data/synthetic_shots.csv', 'w', newline='') as csvfile:
|
with open('data/synthetic_shots.csv', 'w', newline='') as csvfile:
|
||||||
fieldnames = ['points', 'x', 'y']
|
fieldnames = ['points', 'x', 'y']
|
||||||
writer = csv.writer(csvfile)
|
writer = csv.writer(csvfile)
|
||||||
|
|||||||
@ -14,6 +14,7 @@ np.seterr(divide='ignore', invalid='ignore')
|
|||||||
|
|
||||||
FEATURES = ["points", "x", "y"]
|
FEATURES = ["points", "x", "y"]
|
||||||
|
|
||||||
|
# create dataframe with csv file
|
||||||
def make_dataframe(transform):
|
def make_dataframe(transform):
|
||||||
def load_dataframe(file_path):
|
def load_dataframe(file_path):
|
||||||
try:
|
try:
|
||||||
@ -25,20 +26,25 @@ def make_dataframe(transform):
|
|||||||
quit()
|
quit()
|
||||||
return load_dataframe
|
return load_dataframe
|
||||||
|
|
||||||
|
# depending on mode, [x, y] cordinates are used as feature or length of vector (x, y) [radius] is used
|
||||||
def make_features(selector):
|
def make_features(selector):
|
||||||
def select(df):
|
def select(df):
|
||||||
return df
|
return df
|
||||||
return select(selector)
|
return select(selector)
|
||||||
|
|
||||||
|
# Feature radius when mode = v
|
||||||
def radius(df):
|
def radius(df):
|
||||||
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
|
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
|
||||||
return df[["radius"]]
|
return df[["radius"]]
|
||||||
|
|
||||||
|
# Feature ["x", "y"] when mode = a or c
|
||||||
def xy(df):
|
def xy(df):
|
||||||
features = ["x", "y"]
|
features = ["x", "y"]
|
||||||
return df[features]
|
return df[features]
|
||||||
|
|
||||||
|
# apply model on dataframe. Params: df = dataframe, features = function make_features, inf = True or False, graph = True or False
|
||||||
def apply_model(df, features, score, inf, graph):
|
def apply_model(df, features, score, inf, graph):
|
||||||
|
|
||||||
# print dataframe information
|
# print dataframe information
|
||||||
if inf:
|
if inf:
|
||||||
print(df.describe())
|
print(df.describe())
|
||||||
@ -57,12 +63,18 @@ def apply_model(df, features, score, inf, graph):
|
|||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
y = pd.get_dummies(df['points'])
|
y = pd.get_dummies(df['points'])
|
||||||
X = features(df)
|
X = features(df) # select which features to use radius or xy
|
||||||
|
|
||||||
|
# Split data into 60/40 (train/test)
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
||||||
|
|
||||||
|
# Create a RandomForestClassifier with n_estimators=700
|
||||||
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
||||||
|
|
||||||
|
# Create a DecisionTreeClassifier
|
||||||
decision_tree = DecisionTreeClassifier(random_state=0)
|
decision_tree = DecisionTreeClassifier(random_state=0)
|
||||||
|
|
||||||
|
# Create a KNeighborsClassifier with n_neighbors=5
|
||||||
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
||||||
|
|
||||||
models = {
|
models = {
|
||||||
@ -77,13 +89,15 @@ def apply_model(df, features, score, inf, graph):
|
|||||||
for name, model in models.items():
|
for name, model in models.items():
|
||||||
pred = model.predict(X_test.values)
|
pred = model.predict(X_test.values)
|
||||||
|
|
||||||
|
# calculate f1 with own function
|
||||||
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
||||||
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
||||||
|
|
||||||
|
# calculate f1 with sklearn function
|
||||||
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
||||||
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
||||||
|
|
||||||
score = score()
|
score = score() # promt for x, y coordinates and transform score based on mode
|
||||||
|
|
||||||
label_encoder = LabelEncoder()
|
label_encoder = LabelEncoder()
|
||||||
df["points"] = label_encoder.fit_transform(df["points"])
|
df["points"] = label_encoder.fit_transform(df["points"])
|
||||||
@ -104,6 +118,7 @@ def calc_f1_macro(y_true, y_pred):
|
|||||||
f1_scores.append(score)
|
f1_scores.append(score)
|
||||||
return np.mean(f1_scores)
|
return np.mean(f1_scores)
|
||||||
|
|
||||||
|
# calc f1 score
|
||||||
def calc_f1_score(y_true, y_pred):
|
def calc_f1_score(y_true, y_pred):
|
||||||
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
||||||
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
||||||
@ -119,12 +134,15 @@ def calc_f1_score(y_true, y_pred):
|
|||||||
f1 = 0
|
f1 = 0
|
||||||
return f1
|
return f1
|
||||||
|
|
||||||
|
# calc precision
|
||||||
def calc_precision(tp, fp):
|
def calc_precision(tp, fp):
|
||||||
return tp / (tp + fp)
|
return tp / (tp + fp)
|
||||||
|
|
||||||
|
# calc recall
|
||||||
def calc_recall(tp, fn):
|
def calc_recall(tp, fn):
|
||||||
return tp / (tp + fn)
|
return tp / (tp + fn)
|
||||||
|
|
||||||
|
# ask for x, y value and return transformed array based on mode
|
||||||
def make_score_function(transform):
|
def make_score_function(transform):
|
||||||
def get_score_from_cli():
|
def get_score_from_cli():
|
||||||
try:
|
try:
|
||||||
Loading…
x
Reference in New Issue
Block a user