add file classification/classification_template.py updated README
This commit is contained in:
parent
add94d05c7
commit
a311e2b3a9
@ -5,4 +5,5 @@ Install this Python libraries in your virtual environment. Use (uv) pip install
|
||||
* numpy
|
||||
* matplotlib
|
||||
* openpyxl
|
||||
* scikit-learn
|
||||
* scikit-learn
|
||||
* seaborn
|
||||
110
classification/classification_template.py
Normal file
110
classification/classification_template.py
Normal file
@ -0,0 +1,110 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.metrics import f1_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
FEATURES = ['Flipper Length (mm)','Body Mass (g)','Culmen Depth (mm)','Culmen Length (mm)', 'Species']
|
||||
|
||||
def load_dataframe():
|
||||
try:
|
||||
column_list = FEATURES
|
||||
df = pd.read_csv("penguins.csv", usecols=column_list)
|
||||
return df
|
||||
except FileNotFoundError:
|
||||
print("Datei 'penguins.csv' nicht gefunden.")
|
||||
return None
|
||||
|
||||
def calc_precision(tp, fp):
|
||||
print("🛠️ under construction")
|
||||
|
||||
def calc_recall(tp, fn):
|
||||
print("🛠️ under construction")
|
||||
|
||||
def calc_f1_score(y_true, y_pred):
|
||||
#https://stackoverflow.com/questions/64860091/computing-macro-average-f1-score-using-numpy-pythonwithout-using-scikit-learn
|
||||
tp = np.sum(np.multiply([i==True for i in y_pred], y_true))
|
||||
tn = np.sum(np.multiply([i==False for i in y_pred], [not(j) for j in y_true]))
|
||||
fp = np.sum(np.multiply([i==True for i in y_pred], [not(j) for j in y_true]))
|
||||
fn = np.sum(np.multiply([i==False for i in y_pred], y_true))
|
||||
print("🛠️ under construction")
|
||||
|
||||
|
||||
def calc_f1_macro(y_true, y_pred):
|
||||
f1_scores = []
|
||||
for column in y_true:
|
||||
score = calc_f1_score(y_true[column].values, y_pred[column])
|
||||
f1_scores.append(score)
|
||||
return np.mean(f1_scores)
|
||||
|
||||
def get_penguin_from_cli():
|
||||
try:
|
||||
culmen_depth = float(input("Culmen Depth (mm): "))
|
||||
culmen_length = float(input("Culmen Length (mm): "))
|
||||
return np.array([culmen_depth, culmen_length]).reshape(1, -1)
|
||||
except ValueError:
|
||||
print("Invalid input. Please enter numeric values.")
|
||||
return None
|
||||
|
||||
def main():
|
||||
df = load_dataframe()
|
||||
if df is None:
|
||||
return
|
||||
|
||||
print("\n=== Overview ===")
|
||||
print(df.describe())
|
||||
print(df.head())
|
||||
print(df.head().info())
|
||||
|
||||
print("\n=== Quality Assessment ===")
|
||||
row_count = len(df)
|
||||
print("Number of rows ", row_count)
|
||||
print("Check for null-values ", df.isnull().sum())
|
||||
|
||||
print("\n=== Preprocessing ===")
|
||||
# fill null-values with mean
|
||||
df.fillna(df.mean(numeric_only=True), inplace=True)
|
||||
|
||||
# transform species column to numbers
|
||||
label_encoder = LabelEncoder()
|
||||
df["Species"] = label_encoder.fit_transform(df["Species"])
|
||||
|
||||
print("\n=== Countplot ===")
|
||||
# Countplot check for the balancing of the data
|
||||
sns.countplot(x=df["Species"])
|
||||
plt.show()
|
||||
|
||||
print("\n=== Heatmap ===")
|
||||
# Check correlation among other variables
|
||||
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
|
||||
plt.show()
|
||||
|
||||
print("\n=== Feature Selection ===")
|
||||
features = ['Culmen Depth (mm)','Culmen Length (mm)']
|
||||
y = df["Species"]
|
||||
X = df[features]
|
||||
y = pd.get_dummies(y)
|
||||
|
||||
print("\n=== Visualize Features ===")
|
||||
sns.scatterplot(x=df['Culmen Length (mm)'], y=df['Culmen Depth (mm)'], hue=df['Species'])
|
||||
plt.show()
|
||||
|
||||
print("\n=== Model Training ===")
|
||||
print("🛠️ under construction")
|
||||
|
||||
print("\n=== Model Evaluation ===")
|
||||
print("🛠️ under construction")
|
||||
|
||||
print("\n=== Prediction ===")
|
||||
# Culmen Depth (mm) = 18, Culmen Length (mm) = 50
|
||||
#wild_penguin = np.array([18, 50]).reshape(1, -1)
|
||||
#wild_penguin = get_penguin_from_cli()
|
||||
print("🛠️ under construction")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user