classification.py gelöscht
This commit is contained in:
parent
c852e6e532
commit
b19a6d86cc
@ -1,139 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import seaborn as sb
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from sklearn.preprocessing import LabelEncoder
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
|
||||||
from sklearn.metrics import f1_score
|
|
||||||
|
|
||||||
CLASS = "Aktivitätskategorie"
|
|
||||||
FEATURES = [
|
|
||||||
"Distanz km",
|
|
||||||
"Ø Herzfrequenz",
|
|
||||||
"Maximale Herzfrequenz",
|
|
||||||
"Aerober TE",
|
|
||||||
"Ø Geschwindigkeit km/h",
|
|
||||||
"Maximale Geschwindigkeit km/h",
|
|
||||||
"Kalorien",
|
|
||||||
"Zeit"
|
|
||||||
]
|
|
||||||
|
|
||||||
#Define class for all the classifiers and set the instance variables for the values
|
|
||||||
class Classifier:
|
|
||||||
def __init__(self, model):
|
|
||||||
self.model = model
|
|
||||||
self.f1s = [[[] for j in range(len(FEATURES)) ] for i in range(len(FEATURES))]
|
|
||||||
self.f1_means = [[] for i in range(len(FEATURES))]
|
|
||||||
self.f1_medians = [[] for i in range(len(FEATURES))]
|
|
||||||
|
|
||||||
#Function to load the dataframe
|
|
||||||
def load_dataframe():
|
|
||||||
file = "data/Sport_Daten.csv"
|
|
||||||
try:
|
|
||||||
columns = FEATURES.copy() # Copy the Values and dont use the same object as FEATURES
|
|
||||||
columns.append(CLASS)
|
|
||||||
data_frame = pd.read_csv(file, usecols = columns, sep=";") # Sep is required because from excel the csv is exported with ;
|
|
||||||
return data_frame
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
#Function to create a lineplot
|
|
||||||
def show_lineplot(name_of_classifier, data_frame):
|
|
||||||
sb.lineplot(data=data_frame, x="X", y="Y", hue="Features")
|
|
||||||
plt.title(name_of_classifier + ":\n Performance mit Anzahl Features")
|
|
||||||
plt.xlabel("Anzahl Features")
|
|
||||||
plt.ylabel("F1-Mittelwert")
|
|
||||||
plt.savefig("plots/lineplot_" + "_".join(name_of_classifier.split()) + ".png")
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
#Main function
|
|
||||||
def main():
|
|
||||||
#Load Data Frame and check if it was successful
|
|
||||||
data_frame = load_dataframe()
|
|
||||||
if data_frame is None:
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
print("✅ Load Dataframe")
|
|
||||||
|
|
||||||
|
|
||||||
#Create new Column and transform Activity column to numbers
|
|
||||||
labelencoder = LabelEncoder()
|
|
||||||
data_frame[CLASS] = labelencoder.fit_transform(data_frame[CLASS])
|
|
||||||
|
|
||||||
#Create Random Forest Classifier
|
|
||||||
random_forest = RandomForestClassifier(random_state=0)
|
|
||||||
# Create Decision Tree Classifier
|
|
||||||
decision_tree = DecisionTreeClassifier(random_state=0)
|
|
||||||
#Create K-Neighbors Classifier
|
|
||||||
k_neighbors = KNeighborsClassifier(n_neighbors=5)
|
|
||||||
|
|
||||||
#Define Set with Classifiers and name
|
|
||||||
classifiers = {
|
|
||||||
"Random Forest Classifier": Classifier(model = random_forest),
|
|
||||||
"Decision Tree Classifier": Classifier(model = decision_tree),
|
|
||||||
"K-Nearest-Neighbors Classifier": Classifier(model = k_neighbors),
|
|
||||||
}
|
|
||||||
|
|
||||||
#Itterate trough all combinations and calculate binary Value of integer with Array of Features
|
|
||||||
for i in range(1,2**len(FEATURES)): #from 1 to 2^number of Features
|
|
||||||
binary_value = bin(i)[2:].zfill(len(FEATURES)) #Calculate binary value and fill with zeros
|
|
||||||
features = []
|
|
||||||
for j in range(len(FEATURES)): #Itterate trough all positions in binary code
|
|
||||||
if int(binary_value[j]) == 1: #Check if binary Value is 1 / If feature should be used
|
|
||||||
features.append(FEATURES[j])
|
|
||||||
|
|
||||||
#Define dataframes
|
|
||||||
y = data_frame[CLASS]
|
|
||||||
X = data_frame[features]
|
|
||||||
y = pd.get_dummies(y)
|
|
||||||
|
|
||||||
#Split data in test and training data
|
|
||||||
X_train , X_test , y_train , y_test = train_test_split(X, y,test_size=0.2, random_state=0)
|
|
||||||
|
|
||||||
#For all classifiers train the model, predict values and calculate f1-score
|
|
||||||
for index, (name, classifier) in enumerate(classifiers.items()):
|
|
||||||
classifier.model.fit(X_train.values , y_train.values)
|
|
||||||
pred = classifier.model.predict(X_test.values)
|
|
||||||
f1 = f1_score(y_test.values , pred, average='macro')
|
|
||||||
|
|
||||||
#For each Features which was used to train write f1-score in matrix
|
|
||||||
for j in range(len(FEATURES)):
|
|
||||||
if int(binary_value[j]) == 1:
|
|
||||||
classifier.f1s[j][len(features)-1].append(f1)
|
|
||||||
|
|
||||||
#print progress on console, overwrite the current value
|
|
||||||
progress = i * 100 / (2**len(FEATURES)-1)
|
|
||||||
print(f"\r Progress: {progress:.2f}%", end="", flush=True)
|
|
||||||
|
|
||||||
|
|
||||||
#Make calculations for all classifiers
|
|
||||||
for name, classifier in classifiers.items():
|
|
||||||
#Calculate Average of F1 Score
|
|
||||||
for i in range(len(FEATURES)):
|
|
||||||
for j in range(len(FEATURES)):
|
|
||||||
f1s_for_nr_features = []
|
|
||||||
f1s_for_nr_features.extend(classifier.f1s[i][j])
|
|
||||||
mean = sum(f1s_for_nr_features) / len(f1s_for_nr_features)
|
|
||||||
classifier.f1_means[i].append(mean)
|
|
||||||
|
|
||||||
#Create set for all the values in lineplot
|
|
||||||
rows = []
|
|
||||||
for index_feature, feature in enumerate(FEATURES):
|
|
||||||
for x in range(len(classifier.f1_means[index_feature])):
|
|
||||||
rows.append({
|
|
||||||
"Features": feature,
|
|
||||||
"X": x+1,
|
|
||||||
"Y": classifier.f1_means[index_feature][x]
|
|
||||||
})
|
|
||||||
|
|
||||||
#Create Dataframe and hand it to the lineplot function
|
|
||||||
line_plot_dataframe = pd.DataFrame(rows)
|
|
||||||
show_lineplot(name, line_plot_dataframe)
|
|
||||||
|
|
||||||
#Start script
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user