svm_modell.py aktualisiert
This commit is contained in:
parent
3d2b6f208e
commit
b15301a2b2
@ -16,8 +16,6 @@ warnings.filterwarnings('ignore')
|
|||||||
|
|
||||||
def load_and_preprocess_data():
|
def load_and_preprocess_data():
|
||||||
|
|
||||||
# Lädt und bereitet die Daten aus verschiedenen CSV-Dateien vor
|
|
||||||
|
|
||||||
# CSV-Dateien laden
|
# CSV-Dateien laden
|
||||||
activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python")
|
activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python")
|
||||||
|
|
||||||
@ -28,7 +26,6 @@ def load_and_preprocess_data():
|
|||||||
df.columns = df.columns.str.strip()
|
df.columns = df.columns.str.strip()
|
||||||
|
|
||||||
# Sportarten für binäre Klassifikation definieren
|
# Sportarten für binäre Klassifikation definieren
|
||||||
|
|
||||||
# Wir klassifizieren zwischen 'Ausdauersport' und 'Kraftsport'
|
# Wir klassifizieren zwischen 'Ausdauersport' und 'Kraftsport'
|
||||||
endurance_sports = ['Laufen', 'Rennradfahren', 'Schwimmen', 'Radfahren']
|
endurance_sports = ['Laufen', 'Rennradfahren', 'Schwimmen', 'Radfahren']
|
||||||
strength_sports = ['Krafttraining']
|
strength_sports = ['Krafttraining']
|
||||||
@ -41,7 +38,7 @@ def load_and_preprocess_data():
|
|||||||
# Nur gültige Kategorien behalten
|
# Nur gültige Kategorien behalten
|
||||||
df = df[df['sport_category'] != -1]
|
df = df[df['sport_category'] != -1]
|
||||||
|
|
||||||
# Features auswählen, die für die Klassifikation relevant sind
|
# Features auswählen:
|
||||||
numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
|
numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
|
||||||
|
|
||||||
# Nur verfügbare numerische Features auswählen
|
# Nur verfügbare numerische Features auswählen
|
||||||
@ -173,22 +170,22 @@ def main():
|
|||||||
print("Starte SVM-Modell für Sportarten-Klassifikation...")
|
print("Starte SVM-Modell für Sportarten-Klassifikation...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Daten laden und vorverarbeiten
|
# Daten laden und vorverarbeiten
|
||||||
df, features = load_and_preprocess_data()
|
df, features = load_and_preprocess_data()
|
||||||
print(f"Daten geladen: {df.shape[0]} Datensätze, {len(features)} Features")
|
print(f"Daten geladen: {df.shape[0]} Datensätze, {len(features)} Features")
|
||||||
print(f"Verteilung der Sportarten: {df['sport_category'].value_counts().to_dict()}")
|
print(f"Verteilung der Sportarten: {df['sport_category'].value_counts().to_dict()}")
|
||||||
|
|
||||||
# 2. Feature Engineering
|
# Feature Engineering
|
||||||
X, y, feature_names, imputer = feature_engineering(df, features)
|
X, y, feature_names, imputer = feature_engineering(df, features)
|
||||||
print(f"Feature Engineering abgeschlossen: {X.shape[1]} Features")
|
print(f"Feature Engineering abgeschlossen: {X.shape[1]} Features")
|
||||||
|
|
||||||
# 3. SVM-Modell trainieren
|
# SVM-Modell trainieren
|
||||||
model, X_test, y_test, y_pred, best_params, cv_scores, scaler = train_svm_model(X, y)
|
model, X_test, y_test, y_pred, best_params, cv_scores, scaler = train_svm_model(X, y)
|
||||||
|
|
||||||
# 4. Modell evaluieren
|
# Modell evaluieren
|
||||||
accuracy = evaluate_model(y_test, y_pred, cv_scores, best_params)
|
accuracy = evaluate_model(y_test, y_pred, cv_scores, best_params)
|
||||||
|
|
||||||
# 5. Feature-Analyse
|
# Feature-Analyse
|
||||||
feature_importance_analysis(model, X_test, feature_names)
|
feature_importance_analysis(model, X_test, feature_names)
|
||||||
|
|
||||||
print(f"\nModell erfolgreich trainiert mit Genauigkeit: {accuracy:.4f}")
|
print(f"\nModell erfolgreich trainiert mit Genauigkeit: {accuracy:.4f}")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user