From 812792b1fba126cf9b31357643757c05c4df0c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adu=20Alex=20G=C3=B6llnitz?= Date: Wed, 19 Nov 2025 16:46:01 +0100 Subject: [PATCH] svm_modell.py aktualisiert --- svm_modell.py | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/svm_modell.py b/svm_modell.py index 53ab0f7..3e8291d 100644 --- a/svm_modell.py +++ b/svm_modell.py @@ -15,38 +15,32 @@ import warnings warnings.filterwarnings('ignore') def load_and_preprocess_data(): - - # CSV-Dateien laden activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python") - - # Daten bereinigen und konsolidieren df = activities.copy() - - # Spaltennamen bereinigen df.columns = df.columns.str.strip() - - # Sportarten für binäre Klassifikation definieren - # Wir klassifizieren zwischen 'Ausdauersport' und 'Kraftsport' - endurance_sports = ['Laufen', 'Rennradfahren', 'Schwimmen', 'Radfahren'] - strength_sports = ['Krafttraining'] - - # Binäre Zielvariable erstellen - df['sport_category'] = df['Aktivitätstyp'].apply(lambda x: 1 - if x in endurance_sports - else (0 if x in strength_sports else -1)) - - # Nur gültige Kategorien behalten + + def classify_activity(x): + x = str(x).lower() + if 'cardio' in x or 'gehen' in x or 'multisport' in x: + return -1 + if 'kraft' in x: + return 0 + if ('rad' in x or 'bike' in x or 'cycling' in x or 'velo' in x or 'schwimm' in x or 'laufen' in x or 'run' in x): + return 1 + return -1 + + df['sport_category'] = df['Aktivitätstyp'].apply(classify_activity) + df = df[df['sport_category'] != -1] - - # Features auswählen: + numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®'] - - # Nur verfügbare numerische Features auswählen + available_features = [col for col in numeric_features if col in df.columns] - + return df, available_features -def feature_engineering(df, features): # Führt Feature Engineering durch + +def feature_engineering(df, features): # Kopie erstellen df_processed = df.copy()