diff --git a/py/absolut.py b/py/absolut.py deleted file mode 100644 index db094c8..0000000 --- a/py/absolut.py +++ /dev/null @@ -1,82 +0,0 @@ -from py.my_functions import * - -# create dataframe from csv and drop any row with null values -def load_dataframe(file_path): - try: - colum_list = FEATURES - df = pd.read_csv(file_path, usecols = colum_list).dropna() - return df.abs() - except FileNotFoundError as error: - print(error) - quit() - -def get_score_from_cli(): - try: - x = float(input("x: ")) - y = float(input("y: ")) - return np.array([x, y]).reshape(1, -1) - except ValueError: - print("Invalid input. Please enter numeric values.") - return None - -def absolut(file_path, inf, graph): - - # load dataframe with argument [1] - df = load_dataframe(file_path) - - # print dataframe information if argument [3] is true - if inf: - print(df.describe()) - print(df.head()) - print(df.head().info()) - - # display graphs if argument [4] is true - if graph: - sns.countplot(x = df["points"]) - plt.show() - - sns.heatmap(df.corr(), annot=True, cmap='coolwarm') - plt.show() - - sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) - plt.show() - - features = ["x", "y"] - X = df[features] - - y = pd.get_dummies(df['points']) - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) - - random_forest = RandomForestClassifier(n_estimators=700, random_state=0) - decision_tree = DecisionTreeClassifier(random_state=0) - k_neighbors = KNeighborsClassifier(n_neighbors=5) - - models = { - "Random Forest Classifier": random_forest, - "Decision Tree Classifier": decision_tree, - "K-Neighbors": k_neighbors - } - - for name, model in models.items(): - model.fit(X_train.values, y_train.values) - - for name, model in models.items(): - pred = model.predict(X_test.values) - - my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}') - - f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}') - - score = get_score_from_cli() - - label_encoder = LabelEncoder() - df["points"] = label_encoder.fit_transform(df["points"]) - - for name, model in models.items(): - pred = model.predict(score) - points_number = pd.DataFrame(pred).idxmax(axis=1) - points = label_encoder.inverse_transform(points_number)[0] - print(f"{name}: {points} Punkte") \ No newline at end of file diff --git a/py/cartesian.py b/py/cartesian.py deleted file mode 100644 index b0a463e..0000000 --- a/py/cartesian.py +++ /dev/null @@ -1,82 +0,0 @@ -from py.my_functions import * - -# create dataframe from csv and drop any row with null values -def load_dataframe(file_path): - try: - colum_list = FEATURES - df = pd.read_csv(file_path, usecols = colum_list).dropna() - return df - except FileNotFoundError as error: - print(error) - quit() - -def get_score_from_cli(): - try: - x = float(input("x: ")) - y = float(input("y: ")) - return np.array([x, y]).reshape(1, -1) - except ValueError: - print("Invalid input. Please enter numeric values.") - return None - -def cartesian(file_path, inf, graph): - - # load dataframe with argument [1] - df = load_dataframe(file_path) - - # print dataframe information if argument [3] is true - if inf: - print(df.describe()) - print(df.head()) - print(df.head().info()) - - # display graphs if argument [4] is true - if graph: - sns.countplot(x = df["points"]) - plt.show() - - sns.heatmap(df.corr(), annot=True, cmap='coolwarm') - plt.show() - - sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) - plt.show() - - features = ["x", "y"] - X = df[features] - - y = pd.get_dummies(df['points']) - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) - - random_forest = RandomForestClassifier(n_estimators=700, random_state=0) - decision_tree = DecisionTreeClassifier(random_state=0) - k_neighbors = KNeighborsClassifier(n_neighbors=5) - - models = { - "Random Forest Classifier": random_forest, - "Decision Tree Classifier": decision_tree, - "K-Neighbors": k_neighbors - } - - for name, model in models.items(): - model.fit(X_train.values, y_train.values) - - for name, model in models.items(): - pred = model.predict(X_test.values) - - my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}') - - f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}') - - score = get_score_from_cli() - - label_encoder = LabelEncoder() - df["points"] = label_encoder.fit_transform(df["points"]) - - for name, model in models.items(): - pred = model.predict(score) - points_number = pd.DataFrame(pred).idxmax(axis=1) - points = label_encoder.inverse_transform(points_number)[0] - print(f"{name}: {points} Punkte") \ No newline at end of file diff --git a/py/my_functions.py b/py/my_functions.py deleted file mode 100644 index 303b57b..0000000 --- a/py/my_functions.py +++ /dev/null @@ -1,88 +0,0 @@ -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -from sklearn.metrics import f1_score -from sklearn.model_selection import train_test_split -from sklearn.ensemble import RandomForestClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.tree import DecisionTreeClassifier -from sklearn.preprocessing import LabelEncoder - -np.seterr(divide='ignore', invalid='ignore') - -FEATURES = ["points", "x", "y"] - -def make_dataframe(transform): - def load_dataframe(file_path): - try: - colum_list = FEATURES - df = pd.read_csv(file_path, usecols = colum_list).dropna() - return transform(df) - except FileNotFoundError as error: - print(error) - quit() - -def calc_f1_macro(y_true, y_pred): - f1_scores = [] - for column in y_true: - score = calc_f1_score(y_true[column].values, y_pred[column]) - f1_scores.append(score) - return np.mean(f1_scores) - -def calc_f1_score(y_true, y_pred): - tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) - tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) - fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) - fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) - - precision = calc_precision(tp, fp) - recall = calc_recall(tp, fn) - - if precision != 0 and recall != 0: - f1 = (2 * precision * recall) / (precision + recall) - else: - f1 = 0 - return f1 - -def calc_precision(tp, fp): - return tp / (tp + fp) - -def calc_recall(tp, fn): - return tp / (tp + fn) - -def apply_model(X, y): - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) - - random_forest = RandomForestClassifier(n_estimators=700, random_state=0) - decision_tree = DecisionTreeClassifier(random_state=0) - k_neighbors = KNeighborsClassifier(n_neighbors=5) - - models = { - "Random Forest Classifier": random_forest, - "Decision Tree Classifier": decision_tree, - "K-Neighbors": k_neighbors - } - - for name, model in models.items(): - model.fit(X_train.values, y_train.values) - - for name, model in models.items(): - pred = model.predict(X_test.values) - - my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}') - - f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}') - -def make_score_function(transform): - def get_score_from_cli(): - try: - x = float(input("x: ")) - y = float(input("y: ")) - return np.array([transform(x, y)]).reshape(1, -1) - except ValueError: - print("Invalid input. Please enter numeric values.") - return None - return get_score_from_cli \ No newline at end of file diff --git a/py/vector.py b/py/vector.py deleted file mode 100644 index 94c6b07..0000000 --- a/py/vector.py +++ /dev/null @@ -1,88 +0,0 @@ -from my_functions import * - -# create dataframe from csv and drop any row with null values -def load_dataframe(file_path): - try: - colum_list = FEATURES - df = pd.read_csv(file_path, usecols = colum_list).dropna() - return df - except FileNotFoundError as error: - print(error) - quit() - -def get_score_from_cli(): - try: - x = float(input("x: ")) - y = float(input("y: ")) - abs_v = np.sqrt(x**2 + y**2) - return np.array([abs_v]).reshape(1, -1) - except ValueError: - print("Invalid input. Please enter numeric values.") - return None - -def vector(file_path, inf, graph): - - # load dataframe - df = load_dataframe(file_path) - - # print dataframe information - if inf: - print(df.describe()) - print(df.head()) - print(df.head().info()) - - # display graphs - if graph: - sns.countplot(x = df["points"]) - plt.show() - - sns.heatmap(df.corr(), annot=True, cmap='coolwarm') - plt.show() - - sns.scatterplot(x=df['x'], y=df['y'], hue=df['points']) - plt.show() - - df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2) - X = df[["radius"]] - - y = pd.get_dummies(df['points']) - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) - - print(X_train) - quit() - - random_forest = RandomForestClassifier(n_estimators=700, random_state=0) - decision_tree = DecisionTreeClassifier(random_state=0) - k_neighbors = KNeighborsClassifier(n_neighbors=5) - - models = { - "Random Forest Classifier": random_forest, - "Decision Tree Classifier": decision_tree, - "K-Neighbors": k_neighbors - } - - for name, model in models.items(): - model.fit(X_train.values, y_train.values) - - for name, model in models.items(): - pred = model.predict(X_test.values) - - my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred)) - print(f'My F1 score of {name} is {my_f1_macro_score}') - - f1_sklearn = f1_score(y_test.values, pred, average='macro') - print(f'Sklearn F1 score of {name} is {f1_sklearn}') - - score = get_score_from_cli() - - label_encoder = LabelEncoder() - df["points"] = label_encoder.fit_transform(df["points"]) - - for name, model in models.items(): - pred = model.predict(score) - points_number = pd.DataFrame(pred).idxmax(axis=1) - points = label_encoder.inverse_transform(points_number)[0] - print(f"{name}: {points} Punkte") - -vector("data/shots.csv", False, False) \ No newline at end of file