From a311e2b3a9b192cdddb95a5bb64d56f0c06e4182 Mon Sep 17 00:00:00 2001 From: git-sandro Date: Thu, 20 Nov 2025 11:45:30 +0100 Subject: [PATCH] add file classification/classification_template.py updated README --- README.md | 3 +- classification/classification_template.py | 110 ++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 classification/classification_template.py diff --git a/README.md b/README.md index db772ac..7dcc503 100644 --- a/README.md +++ b/README.md @@ -5,4 +5,5 @@ Install this Python libraries in your virtual environment. Use (uv) pip install * numpy * matplotlib * openpyxl -* scikit-learn \ No newline at end of file +* scikit-learn +* seaborn \ No newline at end of file diff --git a/classification/classification_template.py b/classification/classification_template.py new file mode 100644 index 0000000..cf6e233 --- /dev/null +++ b/classification/classification_template.py @@ -0,0 +1,110 @@ +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +from sklearn.metrics import f1_score +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.preprocessing import LabelEncoder + +FEATURES = ['Flipper Length (mm)','Body Mass (g)','Culmen Depth (mm)','Culmen Length (mm)', 'Species'] + +def load_dataframe(): + try: + column_list = FEATURES + df = pd.read_csv("penguins.csv", usecols=column_list) + return df + except FileNotFoundError: + print("Datei 'penguins.csv' nicht gefunden.") + return None + +def calc_precision(tp, fp): + print("🛠️ under construction") + +def calc_recall(tp, fn): + print("🛠️ under construction") + +def calc_f1_score(y_true, y_pred): + #https://stackoverflow.com/questions/64860091/computing-macro-average-f1-score-using-numpy-pythonwithout-using-scikit-learn + tp = np.sum(np.multiply([i==True for i in y_pred], y_true)) + tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true])) + fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true])) + fn = np.sum(np.multiply([i==False for i in y_pred], y_true)) + print("🛠️ under construction") + + +def calc_f1_macro(y_true, y_pred): + f1_scores = [] + for column in y_true: + score = calc_f1_score(y_true[column].values, y_pred[column]) + f1_scores.append(score) + return np.mean(f1_scores) + +def get_penguin_from_cli(): + try: + culmen_depth = float(input("Culmen Depth (mm): ")) + culmen_length = float(input("Culmen Length (mm): ")) + return np.array([culmen_depth, culmen_length]).reshape(1, -1) + except ValueError: + print("Invalid input. Please enter numeric values.") + return None + +def main(): + df = load_dataframe() + if df is None: + return + + print("\n=== Overview ===") + print(df.describe()) + print(df.head()) + print(df.head().info()) + + print("\n=== Quality Assessment ===") + row_count = len(df) + print("Number of rows ", row_count) + print("Check for null-values ", df.isnull().sum()) + + print("\n=== Preprocessing ===") + # fill null-values with mean + df.fillna(df.mean(numeric_only=True), inplace=True) + + # transform species column to numbers + label_encoder = LabelEncoder() + df["Species"] = label_encoder.fit_transform(df["Species"]) + + print("\n=== Countplot ===") + # Countplot check for the balancing of the data + sns.countplot(x=df["Species"]) + plt.show() + + print("\n=== Heatmap ===") + # Check correlation among other variables + sns.heatmap(df.corr(), annot=True, cmap="coolwarm") + plt.show() + + print("\n=== Feature Selection ===") + features = ['Culmen Depth (mm)','Culmen Length (mm)'] + y = df["Species"] + X = df[features] + y = pd.get_dummies(y) + + print("\n=== Visualize Features ===") + sns.scatterplot(x=df['Culmen Length (mm)'], y=df['Culmen Depth (mm)'], hue=df['Species']) + plt.show() + + print("\n=== Model Training ===") + print("🛠️ under construction") + + print("\n=== Model Evaluation ===") + print("🛠️ under construction") + + print("\n=== Prediction ===") + # Culmen Depth (mm) = 18, Culmen Length (mm) = 50 + #wild_penguin = np.array([18, 50]).reshape(1, -1) + #wild_penguin = get_penguin_from_cli() + print("🛠️ under construction") + +if __name__ == "__main__": + main()