From a545950451cb7919a3504f49167faa5eb7ae4d64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adu=20Alex=20G=C3=B6llnitz?=
 <adualex.goellnitz@stud.fhgr.ch>
Date: Thu, 4 Dec 2025 08:15:27 +0100
Subject: [PATCH] =?UTF-8?q?svm=5Fmodell=5F2.py=20gel=C3=B6scht?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 svm_modell_2.py | 255 ------------------------------------------------
 1 file changed, 255 deletions(-)
 delete mode 100644 svm_modell_2.py

diff --git a/svm_modell_2.py b/svm_modell_2.py
deleted file mode 100644
index 730cbdb..0000000
--- a/svm_modell_2.py
+++ /dev/null
@@ -1,255 +0,0 @@
-# Support Vector Machine Version 2.0
-
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
-from sklearn.preprocessing import StandardScaler, LabelEncoder
-from sklearn.svm import SVC
-from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
-from sklearn.impute import SimpleImputer
-import warnings
-warnings.filterwarnings('ignore')
-
-
-
-
-FEATURES = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®', 'Aktivitätstyp']
-
-NUMERIC_FEATURES = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
-
-
-
-def load_dataframe():
-    try:
-        df = pd.read_csv("Activities_rohdaten.csv", usecols=FEATURES)
-        return df
-    except FileNotFoundError:
-        print("Datei 'Activities_rohdaten.csv' nicht gefunden.")
-        return None
-
-# --- Sportart-Klassifikation & Labels ---
-def preprocess_dataframe(df):
-
-    def classify_activity(x):
-        x = str(x).lower()
-        if 'kraft' in x:
-            return 0                   # Kraftsport
-        if ('rad' in x or 'bike' in x or 'cycling' in x or 'velo' in x or 'schwimm' in x or 'laufen' in x or 'run' in x):
-            return 1                   # Ausdauersport
-        return -1                      
-
-    df['sport_category'] = df['Aktivitätstyp'].apply(classify_activity)
-
-    # Nur gültige Klassen behalten
-    df = df[df['sport_category'] != -1].copy()
-
-    # Lesbare Labels für Plots und LabelEncoder
-    df['sport_label'] = df['sport_category'].map({
-        0: 'Kraftsport',
-        1: 'Ausdauersport'
-    })
-
-    return df
-
-# --- Explorative Plots: Countplot, Scatterplot, Korrelations-Heatmap ---
-def exploratory_plots(df):
-    plt.figure(figsize=(6, 4))
-    sns.countplot(x='sport_label', data=df)
-    plt.title('Verteilung der Sportarten')
-    plt.xlabel('Sportart')
-    plt.ylabel('Anzahl')
-    plt.tight_layout()
-    plt.savefig('class_countplot.png', dpi=300, bbox_inches='tight')
-    plt.show()
-
-    available_numeric = [f for f in NUMERIC_FEATURES if f in df.columns]
-    if len(available_numeric) >= 2:
-        feat_x = available_numeric[0]
-        feat_y = available_numeric[1]
-
-        # in numerisch umwandeln für Plot
-        df_plot = df.copy()
-        for col in [feat_x, feat_y]:
-            df_plot[col] = pd.to_numeric(df_plot[col], errors='coerce')
-
-        plt.figure(figsize=(6, 5))
-        sns.scatterplot(data=df_plot, x=feat_x, y=feat_y, hue='sport_label')
-        plt.title(f'Scatterplot: {feat_x} vs. {feat_y}')
-        plt.tight_layout()
-        plt.savefig('scatterplot_features.png', dpi=300, bbox_inches='tight')
-        plt.show()
-
-    if len(available_numeric) > 1:
-        df_corr = df.copy()
-        for col in available_numeric:
-            df_corr[col] = pd.to_numeric(df_corr[col], errors='coerce')
-
-        corr = df_corr[available_numeric].corr()
-        plt.figure(figsize=(8, 6))
-        sns.heatmap(corr, annot=True, fmt=".2f", cmap='coolwarm')
-        plt.title('Korrelations-Heatmap der numerischen Features')
-        plt.tight_layout()
-        plt.savefig('features_correlation_heatmap.png', dpi=300, bbox_inches='tight')
-        plt.show()
-
-# --- Feature Engineering (inkl. LabelEncoder & Imputer) ---
-def feature_engineering(df):
-    df_processed = df.copy()
-
-    # Numerische Features bereinigen/konvertieren
-    for feature in NUMERIC_FEATURES:
-        if feature in df_processed.columns:
-            if df_processed[feature].dtype == 'object':
-                df_processed[feature] = df_processed[feature].astype(str).str.replace(',', '')
-            df_processed[feature] = pd.to_numeric(df_processed[feature], errors='coerce')
-
-    # Zeitbasierte Features aus Datum
-    if 'Datum' in df_processed.columns:
-        df_processed['Datum'] = pd.to_datetime(df_processed['Datum'], errors='coerce')
-        df_processed['hour'] = df_processed['Datum'].dt.hour
-        df_processed['day_of_week'] = df_processed['Datum'].dt.dayofweek
-        df_processed['month'] = df_processed['Datum'].dt.month
-        time_features = ['hour', 'day_of_week', 'month']
-    else:
-        time_features = []
-
-    # Alle Features, die ins Modell gehen
-    feature_names = [f for f in NUMERIC_FEATURES if f in df_processed.columns] + time_features
-
-    X = df_processed[feature_names]
-
-    # LabelEncoder für sport_label (Kraftsport/Ausdauersport)
-    label_encoder = LabelEncoder()
-    y = label_encoder.fit_transform(df_processed['sport_label'])
-
-    # Imputer für fehlende Werte
-    imputer = SimpleImputer(strategy='median')
-    X_imputed = imputer.fit_transform(X)
-
-    return X_imputed, y, feature_names, imputer, label_encoder
-
-
-# --- SVM-Training mit Hyperparameter-Tuning ---
-def train_svm_model(X, y):
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
-
-    scaler = StandardScaler()
-    X_train_scaled = scaler.fit_transform(X_train)
-    X_test_scaled = scaler.transform(X_test)
-
-    param_grid = {'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1], 'kernel': ['rbf', 'linear']}
-
-    svm = SVC(random_state=42)
-    grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
-    grid_search.fit(X_train_scaled, y_train)
-
-    best_svm = grid_search.best_estimator_
-
-    y_pred = best_svm.predict(X_test_scaled)
-
-    cv_scores = cross_val_score(best_svm, X_train_scaled, y_train, cv=5)
-
-    return best_svm, X_test_scaled, y_test, y_pred, grid_search.best_params_, cv_scores, scaler
-
-# --- Evaluation ---
-def evaluate_model(y_test, y_pred, cv_scores, best_params, label_encoder):
-    print("=== SVM-Modell für Sportarten-Klassifikation ===")
-    print(f"Anzahl der Test-Datensätze: {len(y_test)}")
-    print(f"\nBeste Hyperparameter: {best_params}")
-
-    print(f"\nKreuzvalidierung (CV-Scores): {cv_scores}")
-    print(f"Mittlere CV-Genauigkeit: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
-
-    test_acc = accuracy_score(y_test, y_pred)
-    print(f"\nTest-Genauigkeit: {test_acc:.4f}")
-
-    class_names = list(label_encoder.classes_)   # z.B. ['Ausdauersport', 'Kraftsport']
-
-    print("\nKlassifikationsbericht:")
-    print(classification_report(y_test, y_pred, target_names=class_names))
-
-    print("\nKonfusionsmatrix:")
-    cm = confusion_matrix(y_test, y_pred)
-    print(cm)
-
-    # Heatmap der Konfusionsmatrix
-    plt.figure(figsize=(6, 5))
-    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
-    plt.title('Konfusionsmatrix - SVM Sportarten-Klassifikation')
-    plt.ylabel('Wahre Klasse')
-    plt.xlabel('Vorhergesagte Klasse')
-    plt.tight_layout()
-    plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
-    plt.show()
-
-    return test_acc
-
-# --- Feature-Wichtigkeit (nur lineare SVM) ---
-
-def feature_importance_analysis(model, X_test, feature_names):
-    if hasattr(model, 'kernel') and model.kernel == 'linear':
-        importance = np.abs(model.coef_[0])
-        feature_importance = (pd.DataFrame({'feature': feature_names, 'importance': importance}).sort_values('importance', ascending=False))
-
-        print("\nFeature-Wichtigkeit (lineare SVM):")
-        print(feature_importance)
-
-        plt.figure(figsize=(10, 6))
-        sns.barplot(data=feature_importance.head(10), x='importance', y='feature')
-        plt.title('Top 10 Feature-Wichtigkeiten')
-        plt.xlabel('Wichtigkeit')
-        plt.tight_layout()
-        plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
-        plt.show()
-    else:
-        print("\nFeature-Wichtigkeit nur für lineare SVM verfügbar (aktueller Kernel ist nicht 'linear').")
-
-
-
-def main():
-    print("Starte SVM-Modell für Sportarten-Klassifikation...")
-
-    try:
-        df = load_dataframe()
-        if df is None:
-            return None, None, None, None, None
-
-        df = preprocess_dataframe(df)
-        print(f"Daten geladen und vorverarbeitet: {df.shape[0]} Datensätze.")
-        print("Verteilung der Sportarten:")
-        print(df['sport_label'].value_counts())
-
-        # Explorative Plots: Countplot, Scatterplot, Korrelations-Heatmap
-        exploratory_plots(df)
-
-        # Feature Engineering
-        X, y, feature_names, imputer, label_encoder = feature_engineering(df)
-        print(f"Feature Engineering abgeschlossen: {X.shape[1]} Features.")
-
-        # SVM-Modell trainieren
-        model, X_test, y_test, y_pred, best_params, cv_scores, scaler = train_svm_model(X, y)
-
-        # Evaluation
-        accuracy = evaluate_model(y_test, y_pred, cv_scores, best_params, label_encoder)
-
-        # Feature-Wichtigkeiten (falls linear)
-        feature_importance_analysis(model, X_test, feature_names)
-
-        print(f"\nModell erfolgreich trainiert mit Genauigkeit: {accuracy:.4f}")
-        print("\nGespeicherte Dateien:")
-        print("- class_countplot.png")
-        print("- scatterplot_features.png")
-        print("- features_correlation_heatmap.png")
-        print("- confusion_matrix.png")
-        print("- feature_importance.png (falls lineare SVM)")
-
-        return model, scaler, imputer, feature_names, label_encoder
-
-    except Exception as e:
-        print(f"Fehler bei der Ausführung: {str(e)}")
-        return None, None, None, None, None
-
-if __name__ == "__main__":
-    model, scaler, imputer, feature_names, label_encoder = main()