refractoring: added currying
This commit is contained in:
parent
2becf5823b
commit
b08b3aba26
188
main.py
188
main.py
@ -1,84 +1,11 @@
|
|||||||
import sys
|
import sys
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import seaborn as sns
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from sklearn.metrics import f1_score
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
from py.arguments import Arguments
|
from py.arguments import Arguments
|
||||||
|
from py.modell import *
|
||||||
|
|
||||||
if not sys.argv[1:]:
|
if not sys.argv[1:]:
|
||||||
print("Usage: python3 main.py <path to csv>")
|
print("Usage: python3 main.py <path to csv>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
FEATURES = ["points", "x", "y"]
|
|
||||||
|
|
||||||
# create dataframe from csv and drop any row with null values
|
|
||||||
def load_dataframe(file_path):
|
|
||||||
try:
|
|
||||||
colum_list = FEATURES
|
|
||||||
df = pd.read_csv(file_path, usecols = colum_list).dropna()
|
|
||||||
return df
|
|
||||||
except FileNotFoundError as error:
|
|
||||||
print(error)
|
|
||||||
quit()
|
|
||||||
|
|
||||||
def calc_f1_macro(y_true, y_pred):
|
|
||||||
f1_scores = []
|
|
||||||
for column in y_true:
|
|
||||||
score = calc_f1_score(y_true[column].values, y_pred[column])
|
|
||||||
f1_scores.append(score)
|
|
||||||
return np.mean(f1_scores)
|
|
||||||
|
|
||||||
def calc_f1_score(y_true, y_pred):
|
|
||||||
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
|
||||||
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
|
||||||
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
|
|
||||||
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
|
|
||||||
|
|
||||||
precision = calc_precision(tp, fp)
|
|
||||||
recall = calc_recall(tp, fn)
|
|
||||||
|
|
||||||
'''
|
|
||||||
if tp != 0 and fp != 0:
|
|
||||||
precision = calc_precision(tp, fp)
|
|
||||||
else:
|
|
||||||
precision = 0
|
|
||||||
|
|
||||||
if tp != 0 and fn != 0:
|
|
||||||
recall = calc_recall(tp, fn)
|
|
||||||
else:
|
|
||||||
recall = 0
|
|
||||||
'''
|
|
||||||
|
|
||||||
if precision != 0 and recall != 0:
|
|
||||||
f1 = (2 * precision * recall) / (precision + recall)
|
|
||||||
else:
|
|
||||||
f1 = 0
|
|
||||||
return f1
|
|
||||||
|
|
||||||
def calc_precision(tp, fp):
|
|
||||||
return tp / (tp + fp)
|
|
||||||
|
|
||||||
def calc_recall(tp, fn):
|
|
||||||
return tp / (tp + fn)
|
|
||||||
|
|
||||||
def get_score_from_cli(mode):
|
|
||||||
try:
|
|
||||||
x = float(input("x: "))
|
|
||||||
y = float(input("y: "))
|
|
||||||
if mode == "v":
|
|
||||||
abs_v = np.sqrt(x**2 + y**2)
|
|
||||||
return np.array([abs_v]).reshape(1, -1)
|
|
||||||
return np.array([x, y]).reshape(1, -1)
|
|
||||||
except ValueError:
|
|
||||||
print("Invalid input. Please enter numeric values.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
repeat = True
|
repeat = True
|
||||||
@ -87,103 +14,50 @@ def main():
|
|||||||
args.set_information(False)
|
args.set_information(False)
|
||||||
args.set_graph(False)
|
args.set_graph(False)
|
||||||
|
|
||||||
settings = {
|
|
||||||
"repeat": True,
|
|
||||||
"file": args.get_file_path(),
|
|
||||||
"mode": args.get_mode(),
|
|
||||||
"information": args.get_information(),
|
|
||||||
"graph": args.get_graph()
|
|
||||||
}
|
|
||||||
|
|
||||||
while repeat:
|
while repeat:
|
||||||
print("Currently selected setting:")
|
print("Currently selected setting:")
|
||||||
print(f"File: {settings["file"]}")
|
print(f"File: {args.get_file_path()}")
|
||||||
print(f"Mode: {settings["mode"]}")
|
print(f"Mode: {args.get_mode()}")
|
||||||
print(f"Display information: {settings["information"]}")
|
print(f"Display information: {args.get_information()}")
|
||||||
print(f"Display graphs: {settings["graph"]}")
|
print(f"Display graphs: {args.get_graph()}")
|
||||||
|
|
||||||
prompt = input("Change settings [y / exit]: ")
|
prompt = input("Change settings [y / exit / blank]: ")
|
||||||
|
|
||||||
if prompt == "y":
|
if prompt == "y":
|
||||||
args.set_file_path(input("Change file <path to file>: "))
|
try:
|
||||||
args.set_mode(input("Change mode [v, a, c]: "))
|
args.set_file_path(input("Change file <path to file>: "))
|
||||||
args.set_information(bool(input("Display information [True / False]: ")))
|
args.set_mode(input("Change mode [v, a, c]: "))
|
||||||
args.set_graph(bool(input("Display graphs [True / False]: ")))
|
args.set_information(eval(input("Display information [True / False]: ")))
|
||||||
|
args.set_graph(eval(input("Display graphs [True / False]: ")))
|
||||||
|
except ValueError as error:
|
||||||
|
print(f"Value {error}")
|
||||||
elif prompt == "exit":
|
elif prompt == "exit":
|
||||||
quit()
|
quit()
|
||||||
|
|
||||||
# load dataframe with argument [1]
|
|
||||||
df = load_dataframe(args.get_file_path())
|
|
||||||
|
|
||||||
# print dataframe information if argument [3] is true
|
|
||||||
if args.get_information():
|
|
||||||
print(df.describe())
|
|
||||||
print(df.head())
|
|
||||||
print(df.head().info())
|
|
||||||
|
|
||||||
# display graphs if argument [4] is true
|
|
||||||
if args.get_graph():
|
|
||||||
|
|
||||||
sns.countplot(x = df["points"])
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# use verctor length of (x,y) as feature
|
# use verctor length of (x,y) as feature
|
||||||
if args.get_mode() == "v":
|
if args.get_mode() == "v":
|
||||||
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
|
load_dataframe = make_dataframe(lambda df: df)
|
||||||
X = df[["radius"]]
|
features = make_features(radius)
|
||||||
|
score = make_score_function(lambda x, y: [np.sqrt(x**2 + y**2)])
|
||||||
# use absoult values of (x,y) as feature
|
# use absoult values of (x,y) as feature
|
||||||
elif args.get_mode() == "a":
|
elif args.get_mode() == "a":
|
||||||
df_abs = df.copy().abs()
|
load_dataframe = make_dataframe(lambda df: df.abs())
|
||||||
features = ["x", "y"]
|
features = make_features(xy)
|
||||||
X = df[features]
|
score = make_score_function(lambda x, y: [x, y])
|
||||||
# use unaltered values of (x,y) as feature
|
# use unaltered values of (x,y) as feature
|
||||||
elif args.get_mode() == "c":
|
elif args.get_mode() == "c":
|
||||||
features = ["x", "y"]
|
load_dataframe = make_dataframe(lambda df: df)
|
||||||
X = df[features]
|
features = make_features(xy)
|
||||||
|
score = make_score_function(lambda x, y: [x, y])
|
||||||
|
# default use vector length
|
||||||
|
else:
|
||||||
|
load_dataframe = make_dataframe(lambda df: df)
|
||||||
|
features = make_features(radius)
|
||||||
|
score = make_score_function(lambda x, y: [np.sqrt(x**2 + y**2)])
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
apply_model(load_dataframe(args.get_file_path()), features, score, args.get_information(), args.get_graph())
|
||||||
|
|
||||||
y = pd.get_dummies(df['points'])
|
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
|
||||||
|
|
||||||
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
|
||||||
decision_tree = DecisionTreeClassifier(random_state=0)
|
|
||||||
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
|
||||||
|
|
||||||
models = {
|
|
||||||
"Random Forest Classifier": random_forest,
|
|
||||||
"Decision Tree Classifier": decision_tree,
|
|
||||||
"K-Neighbors": k_neighbors
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, model in models.items():
|
|
||||||
model.fit(X_train.values, y_train.values)
|
|
||||||
|
|
||||||
for name, model in models.items():
|
|
||||||
pred = model.predict(X_test.values)
|
|
||||||
|
|
||||||
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
|
||||||
print(f'My F1 score of {name} is {my_f1_macro_score}\n')
|
|
||||||
|
|
||||||
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
|
||||||
print(f'Sklearn F1 score of {name} is {f1_sklearn}\n')
|
|
||||||
|
|
||||||
score = get_score_from_cli(args.get_mode)
|
|
||||||
|
|
||||||
label_encoder = LabelEncoder()
|
|
||||||
df["points"] = label_encoder.fit_transform(df["points"])
|
|
||||||
|
|
||||||
for name, model in models.items():
|
|
||||||
pred = model.predict(score)
|
|
||||||
points_number = pd.DataFrame(pred).idxmax(axis=1)
|
|
||||||
points = label_encoder.inverse_transform(points_number)[0]
|
|
||||||
print(f"{name}: {points} Punkte")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
@ -1,64 +1,15 @@
|
|||||||
import pandas as pd
|
from py.my_functions import *
|
||||||
import numpy as np
|
|
||||||
import seaborn as sns
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from sklearn.metrics import f1_score
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
|
|
||||||
FEATURES = ["points", "x", "y"]
|
|
||||||
|
|
||||||
# create dataframe from csv and drop any row with null values
|
# create dataframe from csv and drop any row with null values
|
||||||
def load_dataframe():
|
def load_dataframe(file_path):
|
||||||
try:
|
try:
|
||||||
colum_list = FEATURES
|
colum_list = FEATURES
|
||||||
df = pd.read_csv("data/synthetic_data.csv", usecols = colum_list).dropna()
|
df = pd.read_csv(file_path, usecols = colum_list).dropna()
|
||||||
return df.abs()
|
return df.abs()
|
||||||
except FileNotFoundError as error:
|
except FileNotFoundError as error:
|
||||||
print(error)
|
print(error)
|
||||||
quit()
|
quit()
|
||||||
|
|
||||||
def calc_f1_macro(y_true, y_pred):
|
|
||||||
f1_scores = []
|
|
||||||
for column in y_true:
|
|
||||||
score = calc_f1_score(y_true[column].values, y_pred[column])
|
|
||||||
f1_scores.append(score)
|
|
||||||
return np.mean(f1_scores)
|
|
||||||
|
|
||||||
def calc_f1_score(y_true, y_pred):
|
|
||||||
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
|
||||||
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
|
||||||
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
|
|
||||||
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
|
|
||||||
|
|
||||||
precision = calc_precision(tp, fp)
|
|
||||||
recall = calc_recall(tp, fn)
|
|
||||||
|
|
||||||
'''if tp != 0 and fp != 0:
|
|
||||||
precision = calc_precision(tp, fp)
|
|
||||||
else:
|
|
||||||
precision = 0
|
|
||||||
|
|
||||||
if tp != 0 and fn != 0:
|
|
||||||
recall = calc_recall(tp, fn)
|
|
||||||
else:
|
|
||||||
recall = 0'''
|
|
||||||
|
|
||||||
if precision != 0 and recall != 0:
|
|
||||||
f1 = (2 * precision * recall) / (precision + recall)
|
|
||||||
else:
|
|
||||||
f1 = 0
|
|
||||||
return f1
|
|
||||||
|
|
||||||
def calc_precision(tp, fp):
|
|
||||||
return tp / (tp + fp)
|
|
||||||
|
|
||||||
def calc_recall(tp, fn):
|
|
||||||
return tp / (tp + fn)
|
|
||||||
|
|
||||||
def get_score_from_cli():
|
def get_score_from_cli():
|
||||||
try:
|
try:
|
||||||
x = float(input("x: "))
|
x = float(input("x: "))
|
||||||
@ -68,24 +19,31 @@ def get_score_from_cli():
|
|||||||
print("Invalid input. Please enter numeric values.")
|
print("Invalid input. Please enter numeric values.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def main():
|
def absolut(file_path, inf, graph):
|
||||||
df = load_dataframe()
|
|
||||||
print(df.describe())
|
|
||||||
print(df.head())
|
|
||||||
print(df.head().info())
|
|
||||||
|
|
||||||
sns.countplot(x = df["points"])
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
# load dataframe with argument [1]
|
||||||
plt.show()
|
df = load_dataframe(file_path)
|
||||||
|
|
||||||
|
# print dataframe information if argument [3] is true
|
||||||
|
if inf:
|
||||||
|
print(df.describe())
|
||||||
|
print(df.head())
|
||||||
|
print(df.head().info())
|
||||||
|
|
||||||
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
# display graphs if argument [4] is true
|
||||||
plt.show()
|
if graph:
|
||||||
|
sns.countplot(x = df["points"])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
features = ["x", "y"]
|
features = ["x", "y"]
|
||||||
X = df[features]
|
X = df[features]
|
||||||
|
|
||||||
y = pd.get_dummies(df['points'])
|
y = pd.get_dummies(df['points'])
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
||||||
@ -107,10 +65,10 @@ def main():
|
|||||||
pred = model.predict(X_test.values)
|
pred = model.predict(X_test.values)
|
||||||
|
|
||||||
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
||||||
print(f'My F1 score of {name} is {my_f1_macro_score}\n')
|
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
||||||
|
|
||||||
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
||||||
print(f'Sklearn F1 score of {name} is {f1_sklearn}\n')
|
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
||||||
|
|
||||||
score = get_score_from_cli()
|
score = get_score_from_cli()
|
||||||
|
|
||||||
@ -121,8 +79,4 @@ def main():
|
|||||||
pred = model.predict(score)
|
pred = model.predict(score)
|
||||||
points_number = pd.DataFrame(pred).idxmax(axis=1)
|
points_number = pd.DataFrame(pred).idxmax(axis=1)
|
||||||
points = label_encoder.inverse_transform(points_number)[0]
|
points = label_encoder.inverse_transform(points_number)[0]
|
||||||
print(f"{name}: {points} Punkte")
|
print(f"{name}: {points} Punkte")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -31,25 +31,16 @@ class Arguments:
|
|||||||
return self.mode.value
|
return self.mode.value
|
||||||
|
|
||||||
def set_mode(self, value):
|
def set_mode(self, value):
|
||||||
try:
|
self.mode = Mode(value)
|
||||||
self.mode = Mode(value)
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(f"Invalid mode '{value}'. Allowed values: {[m.value for m in Mode]}")
|
|
||||||
|
|
||||||
def get_information(self):
|
def get_information(self):
|
||||||
return self.information.value
|
return self.information.value
|
||||||
|
|
||||||
def set_information(self, value):
|
def set_information(self, value):
|
||||||
try:
|
self.information = Information(value)
|
||||||
self.information = Information(value)
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(f"Invalid information '{value}'. Allowed values: {[m.value for m in Information]}")
|
|
||||||
|
|
||||||
def get_graph(self):
|
def get_graph(self):
|
||||||
return self.graph.value
|
return self.graph.value
|
||||||
|
|
||||||
def set_graph(self, value):
|
def set_graph(self, value):
|
||||||
try:
|
self.graph = Graph(value)
|
||||||
self.graph = Graph(value)
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(f"Invalid graph '{value}'. Allowed values: {[m.value for m in Graph]}")
|
|
||||||
@ -0,0 +1,82 @@
|
|||||||
|
from py.my_functions import *
|
||||||
|
|
||||||
|
# create dataframe from csv and drop any row with null values
|
||||||
|
def load_dataframe(file_path):
|
||||||
|
try:
|
||||||
|
colum_list = FEATURES
|
||||||
|
df = pd.read_csv(file_path, usecols = colum_list).dropna()
|
||||||
|
return df
|
||||||
|
except FileNotFoundError as error:
|
||||||
|
print(error)
|
||||||
|
quit()
|
||||||
|
|
||||||
|
def get_score_from_cli():
|
||||||
|
try:
|
||||||
|
x = float(input("x: "))
|
||||||
|
y = float(input("y: "))
|
||||||
|
return np.array([x, y]).reshape(1, -1)
|
||||||
|
except ValueError:
|
||||||
|
print("Invalid input. Please enter numeric values.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def cartesian(file_path, inf, graph):
|
||||||
|
|
||||||
|
# load dataframe with argument [1]
|
||||||
|
df = load_dataframe(file_path)
|
||||||
|
|
||||||
|
# print dataframe information if argument [3] is true
|
||||||
|
if inf:
|
||||||
|
print(df.describe())
|
||||||
|
print(df.head())
|
||||||
|
print(df.head().info())
|
||||||
|
|
||||||
|
# display graphs if argument [4] is true
|
||||||
|
if graph:
|
||||||
|
sns.countplot(x = df["points"])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
features = ["x", "y"]
|
||||||
|
X = df[features]
|
||||||
|
|
||||||
|
y = pd.get_dummies(df['points'])
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
||||||
|
|
||||||
|
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
||||||
|
decision_tree = DecisionTreeClassifier(random_state=0)
|
||||||
|
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
||||||
|
|
||||||
|
models = {
|
||||||
|
"Random Forest Classifier": random_forest,
|
||||||
|
"Decision Tree Classifier": decision_tree,
|
||||||
|
"K-Neighbors": k_neighbors
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
model.fit(X_train.values, y_train.values)
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
pred = model.predict(X_test.values)
|
||||||
|
|
||||||
|
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
||||||
|
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
||||||
|
|
||||||
|
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
||||||
|
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
||||||
|
|
||||||
|
score = get_score_from_cli()
|
||||||
|
|
||||||
|
label_encoder = LabelEncoder()
|
||||||
|
df["points"] = label_encoder.fit_transform(df["points"])
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
pred = model.predict(score)
|
||||||
|
points_number = pd.DataFrame(pred).idxmax(axis=1)
|
||||||
|
points = label_encoder.inverse_transform(points_number)[0]
|
||||||
|
print(f"{name}: {points} Punkte")
|
||||||
137
py/modell.py
Normal file
137
py/modell.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.metrics import f1_score
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
|
||||||
|
# low amounts of features will result in many zero devision in tp=0 and fp=0
|
||||||
|
np.seterr(divide='ignore', invalid='ignore')
|
||||||
|
|
||||||
|
FEATURES = ["points", "x", "y"]
|
||||||
|
|
||||||
|
def make_dataframe(transform):
|
||||||
|
def load_dataframe(file_path):
|
||||||
|
try:
|
||||||
|
colum_list = FEATURES
|
||||||
|
df = pd.read_csv(file_path, usecols = colum_list).dropna()
|
||||||
|
return transform(df)
|
||||||
|
except FileNotFoundError as error:
|
||||||
|
print(error)
|
||||||
|
quit()
|
||||||
|
return load_dataframe
|
||||||
|
|
||||||
|
def make_features(selector):
|
||||||
|
def select(df):
|
||||||
|
return df
|
||||||
|
return select(selector)
|
||||||
|
|
||||||
|
def radius(df):
|
||||||
|
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
|
||||||
|
return df[["radius"]]
|
||||||
|
|
||||||
|
def xy(df):
|
||||||
|
features = ["x", "y"]
|
||||||
|
return df[features]
|
||||||
|
|
||||||
|
def apply_model(df, features, score, inf, graph):
|
||||||
|
# print dataframe information
|
||||||
|
if inf:
|
||||||
|
print(df.describe())
|
||||||
|
print(df.head())
|
||||||
|
print(df.head().info())
|
||||||
|
|
||||||
|
# display graphs
|
||||||
|
if graph:
|
||||||
|
sns.countplot(x = df["points"])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
y = pd.get_dummies(df['points'])
|
||||||
|
X = features(df)
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
||||||
|
|
||||||
|
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
||||||
|
decision_tree = DecisionTreeClassifier(random_state=0)
|
||||||
|
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
||||||
|
|
||||||
|
models = {
|
||||||
|
"Random Forest Classifier": random_forest,
|
||||||
|
"Decision Tree Classifier": decision_tree,
|
||||||
|
"K-Neighbors": k_neighbors
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
model.fit(X_train.values, y_train.values)
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
pred = model.predict(X_test.values)
|
||||||
|
|
||||||
|
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
||||||
|
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
||||||
|
|
||||||
|
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
||||||
|
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
||||||
|
|
||||||
|
score = score()
|
||||||
|
|
||||||
|
label_encoder = LabelEncoder()
|
||||||
|
df["points"] = label_encoder.fit_transform(df["points"])
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
pred = model.predict(score)
|
||||||
|
points_number = pd.DataFrame(pred).idxmax(axis=1)
|
||||||
|
points = label_encoder.inverse_transform(points_number)[0]
|
||||||
|
print(f"{name}: {points} Punkte")
|
||||||
|
|
||||||
|
input("\nPress any key to continue...\n")
|
||||||
|
|
||||||
|
# calc f1 macro
|
||||||
|
def calc_f1_macro(y_true, y_pred):
|
||||||
|
f1_scores = []
|
||||||
|
for column in y_true:
|
||||||
|
score = calc_f1_score(y_true[column].values, y_pred[column])
|
||||||
|
f1_scores.append(score)
|
||||||
|
return np.mean(f1_scores)
|
||||||
|
|
||||||
|
def calc_f1_score(y_true, y_pred):
|
||||||
|
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
||||||
|
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
||||||
|
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
|
||||||
|
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
|
||||||
|
|
||||||
|
precision = calc_precision(tp, fp)
|
||||||
|
recall = calc_recall(tp, fn)
|
||||||
|
|
||||||
|
if precision != 0 and recall != 0:
|
||||||
|
f1 = (2 * precision * recall) / (precision + recall)
|
||||||
|
else:
|
||||||
|
f1 = 0
|
||||||
|
return f1
|
||||||
|
|
||||||
|
def calc_precision(tp, fp):
|
||||||
|
return tp / (tp + fp)
|
||||||
|
|
||||||
|
def calc_recall(tp, fn):
|
||||||
|
return tp / (tp + fn)
|
||||||
|
|
||||||
|
def make_score_function(transform):
|
||||||
|
def get_score_from_cli():
|
||||||
|
try:
|
||||||
|
x = float(input("x: "))
|
||||||
|
y = float(input("y: "))
|
||||||
|
return np.array([transform(x, y)]).reshape(1, -1)
|
||||||
|
except ValueError:
|
||||||
|
print("Invalid input. Please enter numeric values.")
|
||||||
|
return None
|
||||||
|
return get_score_from_cli
|
||||||
88
py/my_functions.py
Normal file
88
py/my_functions.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.metrics import f1_score
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
|
||||||
|
np.seterr(divide='ignore', invalid='ignore')
|
||||||
|
|
||||||
|
FEATURES = ["points", "x", "y"]
|
||||||
|
|
||||||
|
def make_dataframe(transform):
|
||||||
|
def load_dataframe(file_path):
|
||||||
|
try:
|
||||||
|
colum_list = FEATURES
|
||||||
|
df = pd.read_csv(file_path, usecols = colum_list).dropna()
|
||||||
|
return transform(df)
|
||||||
|
except FileNotFoundError as error:
|
||||||
|
print(error)
|
||||||
|
quit()
|
||||||
|
|
||||||
|
def calc_f1_macro(y_true, y_pred):
|
||||||
|
f1_scores = []
|
||||||
|
for column in y_true:
|
||||||
|
score = calc_f1_score(y_true[column].values, y_pred[column])
|
||||||
|
f1_scores.append(score)
|
||||||
|
return np.mean(f1_scores)
|
||||||
|
|
||||||
|
def calc_f1_score(y_true, y_pred):
|
||||||
|
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
||||||
|
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
||||||
|
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
|
||||||
|
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
|
||||||
|
|
||||||
|
precision = calc_precision(tp, fp)
|
||||||
|
recall = calc_recall(tp, fn)
|
||||||
|
|
||||||
|
if precision != 0 and recall != 0:
|
||||||
|
f1 = (2 * precision * recall) / (precision + recall)
|
||||||
|
else:
|
||||||
|
f1 = 0
|
||||||
|
return f1
|
||||||
|
|
||||||
|
def calc_precision(tp, fp):
|
||||||
|
return tp / (tp + fp)
|
||||||
|
|
||||||
|
def calc_recall(tp, fn):
|
||||||
|
return tp / (tp + fn)
|
||||||
|
|
||||||
|
def apply_model(X, y):
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
||||||
|
|
||||||
|
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
||||||
|
decision_tree = DecisionTreeClassifier(random_state=0)
|
||||||
|
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
||||||
|
|
||||||
|
models = {
|
||||||
|
"Random Forest Classifier": random_forest,
|
||||||
|
"Decision Tree Classifier": decision_tree,
|
||||||
|
"K-Neighbors": k_neighbors
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
model.fit(X_train.values, y_train.values)
|
||||||
|
|
||||||
|
for name, model in models.items():
|
||||||
|
pred = model.predict(X_test.values)
|
||||||
|
|
||||||
|
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
||||||
|
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
||||||
|
|
||||||
|
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
||||||
|
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
||||||
|
|
||||||
|
def make_score_function(transform):
|
||||||
|
def get_score_from_cli():
|
||||||
|
try:
|
||||||
|
x = float(input("x: "))
|
||||||
|
y = float(input("y: "))
|
||||||
|
return np.array([transform(x, y)]).reshape(1, -1)
|
||||||
|
except ValueError:
|
||||||
|
print("Invalid input. Please enter numeric values.")
|
||||||
|
return None
|
||||||
|
return get_score_from_cli
|
||||||
93
py/vector.py
93
py/vector.py
@ -1,64 +1,15 @@
|
|||||||
import pandas as pd
|
from my_functions import *
|
||||||
import numpy as np
|
|
||||||
import seaborn as sns
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from sklearn.metrics import f1_score
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
|
|
||||||
FEATURES = ["points", "x", "y"]
|
|
||||||
|
|
||||||
# create dataframe from csv and drop any row with null values
|
# create dataframe from csv and drop any row with null values
|
||||||
def load_dataframe():
|
def load_dataframe(file_path):
|
||||||
try:
|
try:
|
||||||
colum_list = FEATURES
|
colum_list = FEATURES
|
||||||
df = pd.read_csv("data/synthetic_data.csv", usecols = colum_list).dropna()
|
df = pd.read_csv(file_path, usecols = colum_list).dropna()
|
||||||
return df
|
return df
|
||||||
except FileNotFoundError as error:
|
except FileNotFoundError as error:
|
||||||
print(error)
|
print(error)
|
||||||
quit()
|
quit()
|
||||||
|
|
||||||
def calc_f1_macro(y_true, y_pred):
|
|
||||||
f1_scores = []
|
|
||||||
for column in y_true:
|
|
||||||
score = calc_f1_score(y_true[column].values, y_pred[column])
|
|
||||||
f1_scores.append(score)
|
|
||||||
return np.mean(f1_scores)
|
|
||||||
|
|
||||||
def calc_f1_score(y_true, y_pred):
|
|
||||||
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
|
||||||
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
|
||||||
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
|
|
||||||
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
|
|
||||||
|
|
||||||
precision = calc_precision(tp, fp)
|
|
||||||
recall = calc_recall(tp, fn)
|
|
||||||
|
|
||||||
'''if tp != 0 and fp != 0:
|
|
||||||
precision = calc_precision(tp, fp)
|
|
||||||
else:
|
|
||||||
precision = 0
|
|
||||||
|
|
||||||
if tp != 0 and fn != 0:
|
|
||||||
recall = calc_recall(tp, fn)
|
|
||||||
else:
|
|
||||||
recall = 0'''
|
|
||||||
|
|
||||||
if precision != 0 and recall != 0:
|
|
||||||
f1 = (2 * precision * recall) / (precision + recall)
|
|
||||||
else:
|
|
||||||
f1 = 0
|
|
||||||
return f1
|
|
||||||
|
|
||||||
def calc_precision(tp, fp):
|
|
||||||
return tp / (tp + fp)
|
|
||||||
|
|
||||||
def calc_recall(tp, fn):
|
|
||||||
return tp / (tp + fn)
|
|
||||||
|
|
||||||
def get_score_from_cli():
|
def get_score_from_cli():
|
||||||
try:
|
try:
|
||||||
x = float(input("x: "))
|
x = float(input("x: "))
|
||||||
@ -69,18 +20,27 @@ def get_score_from_cli():
|
|||||||
print("Invalid input. Please enter numeric values.")
|
print("Invalid input. Please enter numeric values.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def main():
|
def vector(file_path, inf, graph):
|
||||||
df = load_dataframe()
|
|
||||||
print(df.head())
|
# load dataframe
|
||||||
|
df = load_dataframe(file_path)
|
||||||
|
|
||||||
|
# print dataframe information
|
||||||
|
if inf:
|
||||||
|
print(df.describe())
|
||||||
|
print(df.head())
|
||||||
|
print(df.head().info())
|
||||||
|
|
||||||
sns.countplot(x = df["points"])
|
# display graphs
|
||||||
plt.show()
|
if graph:
|
||||||
|
sns.countplot(x = df["points"])
|
||||||
|
plt.show()
|
||||||
|
|
||||||
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
|
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
|
||||||
X = df[["radius"]]
|
X = df[["radius"]]
|
||||||
@ -89,6 +49,9 @@ def main():
|
|||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
|
||||||
|
|
||||||
|
print(X_train)
|
||||||
|
quit()
|
||||||
|
|
||||||
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
|
||||||
decision_tree = DecisionTreeClassifier(random_state=0)
|
decision_tree = DecisionTreeClassifier(random_state=0)
|
||||||
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
||||||
@ -106,10 +69,10 @@ def main():
|
|||||||
pred = model.predict(X_test.values)
|
pred = model.predict(X_test.values)
|
||||||
|
|
||||||
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
|
||||||
print(f'My F1 score of {name} is {my_f1_macro_score}\n')
|
print(f'My F1 score of {name} is {my_f1_macro_score}')
|
||||||
|
|
||||||
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
f1_sklearn = f1_score(y_test.values, pred, average='macro')
|
||||||
print(f'Sklearn F1 score of {name} is {f1_sklearn}\n')
|
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
|
||||||
|
|
||||||
score = get_score_from_cli()
|
score = get_score_from_cli()
|
||||||
|
|
||||||
@ -122,6 +85,4 @@ def main():
|
|||||||
points = label_encoder.inverse_transform(points_number)[0]
|
points = label_encoder.inverse_transform(points_number)[0]
|
||||||
print(f"{name}: {points} Punkte")
|
print(f"{name}: {points} Punkte")
|
||||||
|
|
||||||
|
vector("data/shots.csv", False, False)
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
x
Reference in New Issue
Block a user