svm_modell.py aktualisiert

This commit is contained in:
Adu Alex Göllnitz 2025-11-19 16:46:01 +01:00
parent b15301a2b2
commit 812792b1fb

View File

@ -15,38 +15,32 @@ import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
def load_and_preprocess_data(): def load_and_preprocess_data():
# CSV-Dateien laden
activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python") activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python")
# Daten bereinigen und konsolidieren
df = activities.copy() df = activities.copy()
# Spaltennamen bereinigen
df.columns = df.columns.str.strip() df.columns = df.columns.str.strip()
# Sportarten für binäre Klassifikation definieren def classify_activity(x):
# Wir klassifizieren zwischen 'Ausdauersport' und 'Kraftsport' x = str(x).lower()
endurance_sports = ['Laufen', 'Rennradfahren', 'Schwimmen', 'Radfahren'] if 'cardio' in x or 'gehen' in x or 'multisport' in x:
strength_sports = ['Krafttraining'] return -1
if 'kraft' in x:
return 0
if ('rad' in x or 'bike' in x or 'cycling' in x or 'velo' in x or 'schwimm' in x or 'laufen' in x or 'run' in x):
return 1
return -1
# Binäre Zielvariable erstellen df['sport_category'] = df['Aktivitätstyp'].apply(classify_activity)
df['sport_category'] = df['Aktivitätstyp'].apply(lambda x: 1
if x in endurance_sports
else (0 if x in strength_sports else -1))
# Nur gültige Kategorien behalten
df = df[df['sport_category'] != -1] df = df[df['sport_category'] != -1]
# Features auswählen:
numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®'] numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
# Nur verfügbare numerische Features auswählen
available_features = [col for col in numeric_features if col in df.columns] available_features = [col for col in numeric_features if col in df.columns]
return df, available_features return df, available_features
def feature_engineering(df, features): # Führt Feature Engineering durch
def feature_engineering(df, features):
# Kopie erstellen # Kopie erstellen
df_processed = df.copy() df_processed = df.copy()