cds1011-ls2/py/modell.py
2025-11-29 23:55:33 +01:00

137 lines
4.1 KiB
Python

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
# low amounts of features will result in many zero devision in tp=0 and fp=0
np.seterr(divide='ignore', invalid='ignore')
FEATURES = ["points", "x", "y"]
def make_dataframe(transform):
def load_dataframe(file_path):
try:
colum_list = FEATURES
df = pd.read_csv(file_path, usecols = colum_list).dropna()
return transform(df)
except FileNotFoundError as error:
print(error)
quit()
return load_dataframe
def make_features(selector):
def select(df):
return df
return select(selector)
def radius(df):
df["radius"] = np.sqrt(df["x"]**2 + df["y"]**2)
return df[["radius"]]
def xy(df):
features = ["x", "y"]
return df[features]
def apply_model(df, features, score, inf, graph):
# print dataframe information
if inf:
print(df.describe())
print(df.head())
print(df.head().info())
# display graphs
if graph:
sns.countplot(x = df["points"])
plt.show()
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()
sns.scatterplot(x=df['x'], y=df['y'], hue=df['points'])
plt.show()
y = pd.get_dummies(df['points'])
X = features(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
random_forest = RandomForestClassifier(n_estimators=700, random_state=0)
decision_tree = DecisionTreeClassifier(random_state=0)
k_neighbors = KNeighborsClassifier(n_neighbors=5)
models = {
"Random Forest Classifier": random_forest,
"Decision Tree Classifier": decision_tree,
"K-Neighbors": k_neighbors
}
for name, model in models.items():
model.fit(X_train.values, y_train.values)
for name, model in models.items():
pred = model.predict(X_test.values)
my_f1_macro_score = calc_f1_macro(y_test, pd.DataFrame(pred))
print(f'My F1 score of {name} is {my_f1_macro_score}')
f1_sklearn = f1_score(y_test.values, pred, average='macro')
print(f'Sklearn F1 score of {name} is {f1_sklearn}')
score = score()
label_encoder = LabelEncoder()
df["points"] = label_encoder.fit_transform(df["points"])
for name, model in models.items():
pred = model.predict(score)
points_number = pd.DataFrame(pred).idxmax(axis=1)
points = label_encoder.inverse_transform(points_number)[0]
print(f"{name}: {points} Punkte")
input("\nPress any key to continue...\n")
# calc f1 macro
def calc_f1_macro(y_true, y_pred):
f1_scores = []
for column in y_true:
score = calc_f1_score(y_true[column].values, y_pred[column])
f1_scores.append(score)
return np.mean(f1_scores)
def calc_f1_score(y_true, y_pred):
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
precision = calc_precision(tp, fp)
recall = calc_recall(tp, fn)
if precision != 0 and recall != 0:
f1 = (2 * precision * recall) / (precision + recall)
else:
f1 = 0
return f1
def calc_precision(tp, fp):
return tp / (tp + fp)
def calc_recall(tp, fn):
return tp / (tp + fn)
def make_score_function(transform):
def get_score_from_cli():
try:
x = float(input("x: "))
y = float(input("y: "))
return np.array([transform(x, y)]).reshape(1, -1)
except ValueError:
print("Invalid input. Please enter numeric values.")
return None
return get_score_from_cli