diff --git a/svm_modell.py b/svm_modell.py index 3e8291d..ec59336 100644 --- a/svm_modell.py +++ b/svm_modell.py @@ -19,6 +19,7 @@ def load_and_preprocess_data(): df = activities.copy() df.columns = df.columns.str.strip() +# Kategorisieren der Sportklassen def classify_activity(x): x = str(x).lower() if 'cardio' in x or 'gehen' in x or 'multisport' in x: @@ -33,6 +34,7 @@ def load_and_preprocess_data(): df = df[df['sport_category'] != -1] +# Numerische Daten welche miteinbezogen werden numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®'] available_features = [col for col in numeric_features if col in df.columns] @@ -141,14 +143,12 @@ def evaluate_model(y_test, y_pred, cv_scores, best_params): # Evaluiert def feature_importance_analysis(model, X_test, feature_names): # Analysiert die Wichtigkeit der Features (für lineare SVM) if model.kernel == 'linear': - # Feature-Wichtigkeit für lineare SVM - importance = np.abs(model.coef_[0]) + importance = np.abs(model.coef_[0]) # Feature-Wichtigkeit für lineare SVM feature_importance = pd.DataFrame({'feature': feature_names, 'importance': importance}).sort_values('importance', ascending=False) print("\nFeature-Wichtigkeit (lineare SVM):") print(feature_importance) - - # Visualisierung + plt.figure(figsize=(10, 6)) sns.barplot(data=feature_importance.head(10), x='importance', y='feature') plt.title('Top 10 Feature-Wichtigkeiten') @@ -159,6 +159,7 @@ def feature_importance_analysis(model, X_test, feature_names): # Analysiert d else: print("\nFeature-Wichtigkeit nur für lineare SVM verfügbar") + def main(): print("Starte SVM-Modell für Sportarten-Klassifikation...")