add file classification/classification_template.py updated README

2025-11-20 11:45:30 +01:00 · 2025-11-20 11:45:30 +01:00 · a311e2b3a9
commit a311e2b3a9
parent add94d05c7
2 changed files with 112 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -5,4 +5,5 @@ Install this Python libraries in your virtual environment. Use (uv) pip install
 * numpy
 * matplotlib
 * openpyxl
-* scikit-learn
+* scikit-learn
+* seaborn
--- a/classification/classification_template.py
+++ b/classification/classification_template.py
@ -0,0 +1,110 @@
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+from sklearn.metrics import f1_score
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.preprocessing import LabelEncoder
+
+FEATURES = ['Flipper Length (mm)','Body Mass (g)','Culmen Depth (mm)','Culmen Length (mm)', 'Species']
+
+def load_dataframe():
+    try:        
+        column_list = FEATURES
+        df = pd.read_csv("penguins.csv", usecols=column_list)
+        return df
+    except FileNotFoundError:
+        print("Datei 'penguins.csv' nicht gefunden.")
+        return None
+
+def calc_precision(tp, fp):
+    print("🛠️ under construction")
+
+def calc_recall(tp, fn):
+    print("🛠️ under construction")
+
+def calc_f1_score(y_true, y_pred):
+    #https://stackoverflow.com/questions/64860091/computing-macro-average-f1-score-using-numpy-pythonwithout-using-scikit-learn
+    tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
+    tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
+    fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
+    fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
+    print("🛠️ under construction")
+
+
+def calc_f1_macro(y_true, y_pred):
+    f1_scores = []
+    for column in y_true:
+        score = calc_f1_score(y_true[column].values, y_pred[column])
+        f1_scores.append(score)
+    return np.mean(f1_scores)
+
+def get_penguin_from_cli():
+    try:
+        culmen_depth = float(input("Culmen Depth (mm): "))
+        culmen_length = float(input("Culmen Length (mm): "))
+        return np.array([culmen_depth, culmen_length]).reshape(1, -1)
+    except ValueError:
+        print("Invalid input. Please enter numeric values.")
+        return None
+
+def main():
+    df = load_dataframe()
+    if df is None:
+        return
+
+    print("\n=== Overview ===")
+    print(df.describe())
+    print(df.head())
+    print(df.head().info())
+
+    print("\n=== Quality Assessment ===")
+    row_count =  len(df)
+    print("Number of rows ", row_count)
+    print("Check for null-values ", df.isnull().sum())
+    
+    print("\n=== Preprocessing ===")
+    # fill null-values with mean
+    df.fillna(df.mean(numeric_only=True), inplace=True)
+
+    # transform species column to numbers
+    label_encoder = LabelEncoder()
+    df["Species"] = label_encoder.fit_transform(df["Species"])
+
+    print("\n=== Countplot ===")
+    # Countplot check for the balancing of the data
+    sns.countplot(x=df["Species"])
+    plt.show()
+
+    print("\n=== Heatmap ===")
+    # Check correlation among other variables
+    sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
+    plt.show()
+
+    print("\n=== Feature Selection ===")
+    features = ['Culmen Depth (mm)','Culmen Length (mm)']
+    y = df["Species"]
+    X = df[features]
+    y = pd.get_dummies(y)
+
+    print("\n=== Visualize Features ===")
+    sns.scatterplot(x=df['Culmen Length (mm)'], y=df['Culmen Depth (mm)'], hue=df['Species'])
+    plt.show()
+
+    print("\n=== Model Training ===")
+    print("🛠️ under construction")
+
+    print("\n=== Model Evaluation ===")        
+    print("🛠️ under construction")
+
+    print("\n=== Prediction ===")
+    # Culmen Depth (mm) = 18, Culmen Length (mm) = 50
+    #wild_penguin = np.array([18, 50]).reshape(1, -1)
+    #wild_penguin = get_penguin_from_cli()
+    print("🛠️ under construction")
+
+if __name__ == "__main__":
+    main()