refractoring: added currying

This commit is contained in:
Sandro Zimmermann 2025-11-29 23:55:33 +01:00
parent 2becf5823b
commit b08b3aba26
8 changed files with 395 additions and 308 deletions

188
main.py
View File

@ -1,84 +1,11 @@
import sys import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from py.arguments import Arguments from py.arguments import Arguments
from py.modell import *
if not sys.argv[1:]: if not sys.argv[1:]:
print("Usage: python3 main.py <path to csv>") print("Usage: python3 main.py <path to csv>")
sys.exit(1) sys.exit(1)
FEATURES = ["points", "x", "y"]
# create dataframe from csv and drop any row with null values
def load_dataframe(file_path):
try:
colum_list = FEATURES
df = pd.read_csv(file_path, usecols = colum_list).dropna()
return df
except FileNotFoundError as error:
print(error)
quit()
def calc_f1_macro(y_true, y_pred):
f1_scores = []
for column in y_true:
score = calc_f1_score(y_true[column].values, y_pred[column])
f1_scores.append(score)
return np.mean(f1_scores)
def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
precision = calc_precision(tp, fp)
recall = calc_recall(tp, fn)
'''
if tp != 0 and fp != 0:
precision = calc_precision(tp, fp)
else:
precision = 0
if tp != 0 and fn != 0:
recall = calc_recall(tp, fn)
else:
recall = 0
'''
if precision != 0 and recall != 0:
f1 = (2 * precision * recall) / (precision + recall)
else:
f1 = 0
return f1
def calc_precision(tp, fp):
return tp / (tp + fp)
def calc_recall(tp, fn):
return tp / (tp + fn)
def get_score_from_cli(mode):
try:
x = float(input("x: "))
y = float(input("y: "))
if mode == "v":
abs_v = np.sqrt(x**2 + y**2)
return np.array([abs_v]).reshape(1, -1)
return np.array([x, y]).reshape(1, -1)
except ValueError:
print("Invalid input. Please enter numeric values.")
return None
def main(): def main():
repeat = True repeat = True
@ -87,103 +14,50 @@ def main():
args.set_information(False) args.set_information(False)
args.set_graph(False) args.set_graph(False)
settings = {
"repeat": True,
"file": args.get_file_path(),
"mode": args.get_mode(),
"information": args.get_information(),
"graph": args.get_graph()
}
while repeat: while repeat:
print("Currently selected setting:") print("Currently selected setting:")
print(f"File: {settings["file"]}") print(f"File: {args.get_file_path()}")
print(f"Mode: {settings["mode"]}") print(f"Mode: {args.get_mode()}")
print(f"Display information: {settings["information"]}") print(f"Display information: {args.get_information()}")
print(f"Display graphs: {settings["graph"]}") print(f"Display graphs: {args.get_graph()}")
prompt = input("Change settings [y / exit]: ") prompt = input("Change settings [y / exit / blank]: ")
if prompt == "y": if prompt == "y":
args.set_file_path(input("Change file <path to file>: ")) try:
args.set_mode(input("Change mode [v, a, c]: ")) args.set_file_path(input("Change file <path to file>: "))
args.set_information(bool(input("Display information [True / False]: "))) args.set_mode(input("Change mode [v, a, c]: "))
args.set_graph(bool(input("Display graphs [True / False]: "))) args.set_information(eval(input("Display information [True / False]: ")))
args.set_graph(eval(input("Display graphs [True / False]: ")))
except ValueError as error:
print(f"Value {error}")
elif prompt == "exit": elif prompt == "exit":
quit() quit()
# load dataframe with argument [1]
df = load_dataframe(args.get_file_path())
# print dataframe information if argument [3] is true
if args.get_information():
print(df.describe())
print(df.head())
print(df.head().info())
# display graphs if argument [4] is true
if args.get_graph():
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
plt.show()
# use verctor length of (x,y) as feature # use verctor length of (x,y) as feature
if args.get_mode() == "v": if args.get_mode() == "v":
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) load_dataframe = make_dataframe(lambda df: df)
X = df[["radius"]] features = make_features(radius)
score = make_score_function(lambda x, y: [np.sqrt(x**2 + y**2)])
# use absoult values of (x,y) as feature # use absoult values of (x,y) as feature
elif args.get_mode() == "a": elif args.get_mode() == "a":
df_abs = df.copy().abs() load_dataframe = make_dataframe(lambda df: df.abs())
features = ["x", "y"] features = make_features(xy)
X = df[features] score = make_score_function(lambda x, y: [x, y])
# use unaltered values of (x,y) as feature # use unaltered values of (x,y) as feature
elif args.get_mode() == "c": elif args.get_mode() == "c":
features = ["x", "y"] load_dataframe = make_dataframe(lambda df: df)
X = df[features] features = make_features(xy)
score = make_score_function(lambda x, y: [x, y])
# default use vector length
else:
load_dataframe = make_dataframe(lambda df: df)
features = make_features(radius)
score = make_score_function(lambda x, y: [np.sqrt(x**2 + y**2)])
print("\n")
apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph())
y = pd.get_dummies(df['points'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
decision_tree = DecisionTreeClassifier(random_state=0)
k_neighbors = KNeighborsClassifier(n_neighbors=5)
models = {
"Random Forest Classifier": random_forest,
"Decision Tree Classifier": decision_tree,
"K-Neighbors": k_neighbors
}
for name, model in models.items():
model.fit(X_train.values, y_train.values)
for name, model in models.items():
pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}\n')
f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}\n')
score = get_score_from_cli(args.get_mode)
label_encoder = LabelEncoder()
df["points"] = label_encoder.fit_transform(df["points"])
for name, model in models.items():
pred = model.predict(score)
points_number = pd.DataFrame(pred).idxmax(axis=1)
points = label_encoder.inverse_transform(points_number)[0]
print(f"{name}: {points} Punkte")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,64 +1,15 @@
import pandas as pd from py.my_functions import *
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
FEATURES = ["points", "x", "y"]
# create dataframe from csv and drop any row with null values # create dataframe from csv and drop any row with null values
def load_dataframe(): def load_dataframe(file_path):
try: try:
colum_list = FEATURES colum_list = FEATURES
df = pd.read_csv("data/synthetic_data.csv", usecols = colum_list).dropna() df = pd.read_csv(file_path, usecols = colum_list).dropna()
return df.abs() return df.abs()
except FileNotFoundError as error: except FileNotFoundError as error:
print(error) print(error)
quit() quit()
def calc_f1_macro(y_true, y_pred):
f1_scores = []
for column in y_true:
score = calc_f1_score(y_true[column].values, y_pred[column])
f1_scores.append(score)
return np.mean(f1_scores)
def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
precision = calc_precision(tp, fp)
recall = calc_recall(tp, fn)
'''if tp != 0 and fp != 0:
precision = calc_precision(tp, fp)
else:
precision = 0
if tp != 0 and fn != 0:
recall = calc_recall(tp, fn)
else:
recall = 0'''
if precision != 0 and recall != 0:
f1 = (2 * precision * recall) / (precision + recall)
else:
f1 = 0
return f1
def calc_precision(tp, fp):
return tp / (tp + fp)
def calc_recall(tp, fn):
return tp / (tp + fn)
def get_score_from_cli(): def get_score_from_cli():
try: try:
x = float(input("x: ")) x = float(input("x: "))
@ -68,24 +19,31 @@ def get_score_from_cli():
print("Invalid input. Please enter numeric values.") print("Invalid input. Please enter numeric values.")
return None return None
def main(): def absolut(file_path, inf, graph):
df = load_dataframe()
print(df.describe())
print(df.head())
print(df.head().info())
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm') # load dataframe with argument [1]
plt.show() df = load_dataframe(file_path)
# print dataframe information if argument [3] is true
if inf:
print(df.describe())
print(df.head())
print(df.head().info())
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) # display graphs if argument [4] is true
plt.show() if graph:
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
plt.show()
features = ["x", "y"] features = ["x", "y"]
X = df[features] X = df[features]
y = pd.get_dummies(df['points']) y = pd.get_dummies(df['points'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
@ -107,10 +65,10 @@ def main():
pred = model.predict(X_test.values) pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}\n') print(f'My F1 score of {name} is {my_f1_macro_score}')
f1_sklearn = f1_score(y_test.values, pred, average='macro') f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}\n') print(f'Sklearn F1 score of {name} is {f1_sklearn}')
score = get_score_from_cli() score = get_score_from_cli()
@ -121,8 +79,4 @@ def main():
pred = model.predict(score) pred = model.predict(score)
points_number = pd.DataFrame(pred).idxmax(axis=1) points_number = pd.DataFrame(pred).idxmax(axis=1)
points = label_encoder.inverse_transform(points_number)[0] points = label_encoder.inverse_transform(points_number)[0]
print(f"{name}: {points} Punkte") print(f"{name}: {points} Punkte")
if __name__ == "__main__":
main()

View File

@ -31,25 +31,16 @@ class Arguments:
return self.mode.value return self.mode.value
def set_mode(self, value): def set_mode(self, value):
try: self.mode = Mode(value)
self.mode = Mode(value)
except ValueError:
raise ValueError(f"Invalid mode '{value}'. Allowed values: {[m.value for m in Mode]}")
def get_information(self): def get_information(self):
return self.information.value return self.information.value
def set_information(self, value): def set_information(self, value):
try: self.information = Information(value)
self.information = Information(value)
except ValueError:
raise ValueError(f"Invalid information '{value}'. Allowed values: {[m.value for m in Information]}")
def get_graph(self): def get_graph(self):
return self.graph.value return self.graph.value
def set_graph(self, value): def set_graph(self, value):
try: self.graph = Graph(value)
self.graph = Graph(value)
except ValueError:
raise ValueError(f"Invalid graph '{value}'. Allowed values: {[m.value for m in Graph]}")

View File

@ -0,0 +1,82 @@
from py.my_functions import *
# create dataframe from csv and drop any row with null values
def load_dataframe(file_path):
try:
colum_list = FEATURES
df = pd.read_csv(file_path, usecols = colum_list).dropna()
return df
except FileNotFoundError as error:
print(error)
quit()
def get_score_from_cli():
try:
x = float(input("x: "))
y = float(input("y: "))
return np.array([x, y]).reshape(1, -1)
except ValueError:
print("Invalid input. Please enter numeric values.")
return None
def cartesian(file_path, inf, graph):
# load dataframe with argument [1]
df = load_dataframe(file_path)
# print dataframe information if argument [3] is true
if inf:
print(df.describe())
print(df.head())
print(df.head().info())
# display graphs if argument [4] is true
if graph:
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
plt.show()
features = ["x", "y"]
X = df[features]
y = pd.get_dummies(df['points'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
decision_tree = DecisionTreeClassifier(random_state=0)
k_neighbors = KNeighborsClassifier(n_neighbors=5)
models = {
"Random Forest Classifier": random_forest,
"Decision Tree Classifier": decision_tree,
"K-Neighbors": k_neighbors
}
for name, model in models.items():
model.fit(X_train.values, y_train.values)
for name, model in models.items():
pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}')
f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
score = get_score_from_cli()
label_encoder = LabelEncoder()
df["points"] = label_encoder.fit_transform(df["points"])
for name, model in models.items():
pred = model.predict(score)
points_number = pd.DataFrame(pred).idxmax(axis=1)
points = label_encoder.inverse_transform(points_number)[0]
print(f"{name}: {points} Punkte")

View File

137
py/modell.py Normal file
View File

@ -0,0 +1,137 @@
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
# low amounts of features will result in many zero devision in tp=0 and fp=0
np.seterr(divide='ignore', invalid='ignore')
FEATURES = ["points", "x", "y"]
def make_dataframe(transform):
def load_dataframe(file_path):
try:
colum_list = FEATURES
df = pd.read_csv(file_path, usecols = colum_list).dropna()
return transform(df)
except FileNotFoundError as error:
print(error)
quit()
return load_dataframe
def make_features(selector):
def select(df):
return df
return select(selector)
def radius(df):
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
return df[["radius"]]
def xy(df):
features = ["x", "y"]
return df[features]
def apply_model(df, features, score, inf, graph):
# print dataframe information
if inf:
print(df.describe())
print(df.head())
print(df.head().info())
# display graphs
if graph:
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
plt.show()
y = pd.get_dummies(df['points'])
X = features(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
decision_tree = DecisionTreeClassifier(random_state=0)
k_neighbors = KNeighborsClassifier(n_neighbors=5)
models = {
"Random Forest Classifier": random_forest,
"Decision Tree Classifier": decision_tree,
"K-Neighbors": k_neighbors
}
for name, model in models.items():
model.fit(X_train.values, y_train.values)
for name, model in models.items():
pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}')
f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
score = score()
label_encoder = LabelEncoder()
df["points"] = label_encoder.fit_transform(df["points"])
for name, model in models.items():
pred = model.predict(score)
points_number = pd.DataFrame(pred).idxmax(axis=1)
points = label_encoder.inverse_transform(points_number)[0]
print(f"{name}: {points} Punkte")
input("\nPress any key to continue...\n")
# calc f1 macro
def calc_f1_macro(y_true, y_pred):
f1_scores = []
for column in y_true:
score = calc_f1_score(y_true[column].values, y_pred[column])
f1_scores.append(score)
return np.mean(f1_scores)
def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
precision = calc_precision(tp, fp)
recall = calc_recall(tp, fn)
if precision != 0 and recall != 0:
f1 = (2 * precision * recall) / (precision + recall)
else:
f1 = 0
return f1
def calc_precision(tp, fp):
return tp / (tp + fp)
def calc_recall(tp, fn):
return tp / (tp + fn)
def make_score_function(transform):
def get_score_from_cli():
try:
x = float(input("x: "))
y = float(input("y: "))
return np.array([transform(x, y)]).reshape(1, -1)
except ValueError:
print("Invalid input. Please enter numeric values.")
return None
return get_score_from_cli

88
py/my_functions.py Normal file
View File

@ -0,0 +1,88 @@
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
np.seterr(divide='ignore', invalid='ignore')
FEATURES = ["points", "x", "y"]
def make_dataframe(transform):
def load_dataframe(file_path):
try:
colum_list = FEATURES
df = pd.read_csv(file_path, usecols = colum_list).dropna()
return transform(df)
except FileNotFoundError as error:
print(error)
quit()
def calc_f1_macro(y_true, y_pred):
f1_scores = []
for column in y_true:
score = calc_f1_score(y_true[column].values, y_pred[column])
f1_scores.append(score)
return np.mean(f1_scores)
def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
precision = calc_precision(tp, fp)
recall = calc_recall(tp, fn)
if precision != 0 and recall != 0:
f1 = (2 * precision * recall) / (precision + recall)
else:
f1 = 0
return f1
def calc_precision(tp, fp):
return tp / (tp + fp)
def calc_recall(tp, fn):
return tp / (tp + fn)
def apply_model(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
decision_tree = DecisionTreeClassifier(random_state=0)
k_neighbors = KNeighborsClassifier(n_neighbors=5)
models = {
"Random Forest Classifier": random_forest,
"Decision Tree Classifier": decision_tree,
"K-Neighbors": k_neighbors
}
for name, model in models.items():
model.fit(X_train.values, y_train.values)
for name, model in models.items():
pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}')
f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
def make_score_function(transform):
def get_score_from_cli():
try:
x = float(input("x: "))
y = float(input("y: "))
return np.array([transform(x, y)]).reshape(1, -1)
except ValueError:
print("Invalid input. Please enter numeric values.")
return None
return get_score_from_cli

View File

@ -1,64 +1,15 @@
import pandas as pd from my_functions import *
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
FEATURES = ["points", "x", "y"]
# create dataframe from csv and drop any row with null values # create dataframe from csv and drop any row with null values
def load_dataframe(): def load_dataframe(file_path):
try: try:
colum_list = FEATURES colum_list = FEATURES
df = pd.read_csv("data/synthetic_data.csv", usecols = colum_list).dropna() df = pd.read_csv(file_path, usecols = colum_list).dropna()
return df return df
except FileNotFoundError as error: except FileNotFoundError as error:
print(error) print(error)
quit() quit()
def calc_f1_macro(y_true, y_pred):
f1_scores = []
for column in y_true:
score = calc_f1_score(y_true[column].values, y_pred[column])
f1_scores.append(score)
return np.mean(f1_scores)
def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
precision = calc_precision(tp, fp)
recall = calc_recall(tp, fn)
'''if tp != 0 and fp != 0:
precision = calc_precision(tp, fp)
else:
precision = 0
if tp != 0 and fn != 0:
recall = calc_recall(tp, fn)
else:
recall = 0'''
if precision != 0 and recall != 0:
f1 = (2 * precision * recall) / (precision + recall)
else:
f1 = 0
return f1
def calc_precision(tp, fp):
return tp / (tp + fp)
def calc_recall(tp, fn):
return tp / (tp + fn)
def get_score_from_cli(): def get_score_from_cli():
try: try:
x = float(input("x: ")) x = float(input("x: "))
@ -69,18 +20,27 @@ def get_score_from_cli():
print("Invalid input. Please enter numeric values.") print("Invalid input. Please enter numeric values.")
return None return None
def main(): def vector(file_path, inf, graph):
df = load_dataframe()
print(df.head()) # load dataframe
df = load_dataframe(file_path)
# print dataframe information
if inf:
print(df.describe())
print(df.head())
print(df.head().info())
sns.countplot(x = df["points"]) # display graphs
plt.show() if graph:
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm') sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show() plt.show()
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
plt.show() plt.show()
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
X = df[["radius"]] X = df[["radius"]]
@ -89,6 +49,9 @@ def main():
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
print(X_train)
quit()
random_forest = RandomForestClassifier(n_estimators=700, random_state=0) random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
decision_tree = DecisionTreeClassifier(random_state=0) decision_tree = DecisionTreeClassifier(random_state=0)
k_neighbors = KNeighborsClassifier(n_neighbors=5) k_neighbors = KNeighborsClassifier(n_neighbors=5)
@ -106,10 +69,10 @@ def main():
pred = model.predict(X_test.values) pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}\n') print(f'My F1 score of {name} is {my_f1_macro_score}')
f1_sklearn = f1_score(y_test.values, pred, average='macro') f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}\n') print(f'Sklearn F1 score of {name} is {f1_sklearn}')
score = get_score_from_cli() score = get_score_from_cli()
@ -122,6 +85,4 @@ def main():
points = label_encoder.inverse_transform(points_number)[0] points = label_encoder.inverse_transform(points_number)[0]
print(f"{name}: {points} Punkte") print(f"{name}: {points} Punkte")
vector("data/shots.csv", False, False)
if __name__ == "__main__":
main()