classification.py gelöscht

2025-12-04 12:15:45 +01:00 · 2025-12-04 12:15:45 +01:00 · b19a6d86cc
commit b19a6d86cc
parent c852e6e532
1 changed files with 0 additions and 139 deletions
--- a/classification.py
+++ b/classification.py
@ -1,139 +0,0 @@
-import pandas as pd
-import seaborn as sb
-import matplotlib.pyplot as plt
-from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.metrics import f1_score
-
-CLASS = "Aktivitätskategorie"
-FEATURES = [
-    "Distanz km", 
-    "Ø Herzfrequenz",
-    "Maximale Herzfrequenz",
-    "Aerober TE", 
-    "Ø Geschwindigkeit km/h",
-    "Maximale Geschwindigkeit km/h",
-    "Kalorien",
-    "Zeit"
-]
-
-#Define class for all the classifiers and set the instance variables for the values
-class Classifier:
-    def __init__(self, model):
-        self.model = model
-        self.f1s = [[[] for j in range(len(FEATURES)) ] for i in range(len(FEATURES))]
-        self.f1_means = [[] for i  in range(len(FEATURES))]
-        self.f1_medians = [[] for i  in range(len(FEATURES))]
-
-#Function to load the dataframe
-def load_dataframe():
-    file = "data/Sport_Daten.csv"
-    try:
-        columns = FEATURES.copy() # Copy the Values and dont use the same object as FEATURES
-        columns.append(CLASS)
-        data_frame = pd.read_csv(file, usecols = columns, sep=";") # Sep is required because from excel the csv is exported with ;
-        return data_frame
-    except:
-        return None
-  
-#Function to create a lineplot
-def show_lineplot(name_of_classifier, data_frame):
-    sb.lineplot(data=data_frame, x="X", y="Y", hue="Features")
-    plt.title(name_of_classifier + ":\n Performance mit Anzahl Features")
-    plt.xlabel("Anzahl Features")
-    plt.ylabel("F1-Mittelwert")
-    plt.savefig("plots/lineplot_" + "_".join(name_of_classifier.split()) + ".png")
-    plt.show()
- 
-#Main function    
-def main():
-    #Load Data Frame and check if it was successful
-    data_frame = load_dataframe()
-    if data_frame is None:
-        return
-    else:
-        print("✅ Load Dataframe")
-        
-    
-    #Create new Column and transform Activity column to numbers
-    labelencoder = LabelEncoder()
-    data_frame[CLASS] = labelencoder.fit_transform(data_frame[CLASS])
-    
-    #Create Random Forest Classifier
-    random_forest = RandomForestClassifier(random_state=0)
-    # Create Decision Tree Classifier
-    decision_tree = DecisionTreeClassifier(random_state=0)
-    #Create K-Neighbors Classifier
-    k_neighbors = KNeighborsClassifier(n_neighbors=5)
-        
-    #Define Set with Classifiers and name
-    classifiers = {
-        "Random Forest Classifier": Classifier(model = random_forest),
-        "Decision Tree Classifier": Classifier(model = decision_tree),
-        "K-Nearest-Neighbors Classifier": Classifier(model = k_neighbors),
-    }
-    
-    #Itterate trough all combinations and calculate binary Value of integer with Array of Features
-    for i in range(1,2**len(FEATURES)):                 #from 1 to 2^number of Features
-        binary_value = bin(i)[2:].zfill(len(FEATURES))  #Calculate binary value and fill with zeros
-        features = []
-        for j in range(len(FEATURES)):      #Itterate trough all positions in binary code
-            if int(binary_value[j]) == 1:   #Check if binary Value is 1 / If feature should be used
-               features.append(FEATURES[j]) 
-        
-        #Define dataframes
-        y = data_frame[CLASS]
-        X = data_frame[features]
-        y = pd.get_dummies(y)
-    
-        #Split data in test and training data
-        X_train , X_test , y_train , y_test = train_test_split(X, y,test_size=0.2, random_state=0)
-    
-        #For all classifiers train the model, predict values and calculate f1-score
-        for index, (name, classifier) in enumerate(classifiers.items()):
-            classifier.model.fit(X_train.values , y_train.values)
-            pred = classifier.model.predict(X_test.values)
-            f1 = f1_score(y_test.values , pred, average='macro')
-            
-            #For each Features which was used to train write f1-score in matrix
-            for j in range(len(FEATURES)):
-                if int(binary_value[j]) == 1:
-                    classifier.f1s[j][len(features)-1].append(f1)
-            
-        #print progress on console, overwrite the current value
-        progress = i * 100 / (2**len(FEATURES)-1)
-        print(f"\r Progress: {progress:.2f}%", end="", flush=True)
-    
-    
-    #Make calculations for all classifiers
-    for name, classifier in classifiers.items():
-        #Calculate Average of F1 Score
-        for i in range(len(FEATURES)):
-            for j in range(len(FEATURES)): 
-                f1s_for_nr_features = []                
-                f1s_for_nr_features.extend(classifier.f1s[i][j]) 
-                mean = sum(f1s_for_nr_features) / len(f1s_for_nr_features)
-                classifier.f1_means[i].append(mean)
-                   
-        #Create set for all the values in lineplot 
-        rows = []
-        for index_feature, feature in enumerate(FEATURES):
-            for x in range(len(classifier.f1_means[index_feature])):
-                rows.append({
-                    "Features": feature,
-                    "X": x+1,
-                    "Y": classifier.f1_means[index_feature][x]
-                })
-             
-        #Create Dataframe and hand it to the lineplot function  
-        line_plot_dataframe = pd.DataFrame(rows)
-        show_lineplot(name, line_plot_dataframe)
-        
-#Start script            
-if __name__ == "__main__":
-    main()
-
-