svm_modell.py aktualisiert
This commit is contained in:
parent
b15301a2b2
commit
812792b1fb
@ -15,38 +15,32 @@ import warnings
|
|||||||
warnings.filterwarnings('ignore')
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
def load_and_preprocess_data():
|
def load_and_preprocess_data():
|
||||||
|
|
||||||
# CSV-Dateien laden
|
|
||||||
activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python")
|
activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python")
|
||||||
|
|
||||||
# Daten bereinigen und konsolidieren
|
|
||||||
df = activities.copy()
|
df = activities.copy()
|
||||||
|
|
||||||
# Spaltennamen bereinigen
|
|
||||||
df.columns = df.columns.str.strip()
|
df.columns = df.columns.str.strip()
|
||||||
|
|
||||||
# Sportarten für binäre Klassifikation definieren
|
def classify_activity(x):
|
||||||
# Wir klassifizieren zwischen 'Ausdauersport' und 'Kraftsport'
|
x = str(x).lower()
|
||||||
endurance_sports = ['Laufen', 'Rennradfahren', 'Schwimmen', 'Radfahren']
|
if 'cardio' in x or 'gehen' in x or 'multisport' in x:
|
||||||
strength_sports = ['Krafttraining']
|
return -1
|
||||||
|
if 'kraft' in x:
|
||||||
|
return 0
|
||||||
|
if ('rad' in x or 'bike' in x or 'cycling' in x or 'velo' in x or 'schwimm' in x or 'laufen' in x or 'run' in x):
|
||||||
|
return 1
|
||||||
|
return -1
|
||||||
|
|
||||||
# Binäre Zielvariable erstellen
|
df['sport_category'] = df['Aktivitätstyp'].apply(classify_activity)
|
||||||
df['sport_category'] = df['Aktivitätstyp'].apply(lambda x: 1
|
|
||||||
if x in endurance_sports
|
|
||||||
else (0 if x in strength_sports else -1))
|
|
||||||
|
|
||||||
# Nur gültige Kategorien behalten
|
|
||||||
df = df[df['sport_category'] != -1]
|
df = df[df['sport_category'] != -1]
|
||||||
|
|
||||||
# Features auswählen:
|
|
||||||
numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
|
numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
|
||||||
|
|
||||||
# Nur verfügbare numerische Features auswählen
|
|
||||||
available_features = [col for col in numeric_features if col in df.columns]
|
available_features = [col for col in numeric_features if col in df.columns]
|
||||||
|
|
||||||
return df, available_features
|
return df, available_features
|
||||||
|
|
||||||
def feature_engineering(df, features): # Führt Feature Engineering durch
|
|
||||||
|
def feature_engineering(df, features):
|
||||||
|
|
||||||
# Kopie erstellen
|
# Kopie erstellen
|
||||||
df_processed = df.copy()
|
df_processed = df.copy()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user