Dateien nach "/" hochladen
This commit is contained in:
commit
3d2b6f208e
21
Activities_rohdaten.csv
Normal file
21
Activities_rohdaten.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
Aktivitätstyp,Datum,Favorit,Titel,Distanz,Kalorien,Zeit,Ø Herzfrequenz,Maximale Herzfrequenz,Aerober TE,Ø Trittfrequenz,Max. Trittfrequenz,Ø Geschwindigkeit,Maximale Geschwindigkeit,Anstieg gesamt,Abstieg gesamt,Ø Schrittlänge,Durchschnittliches vertikales Verhältnis,Ø vertikale Bewegung,Ø Bodenkontaktzeit,Durchschnittliche Balance der Bodenkontaktzeit,Durchschnittliche SAP,Ø Trittfrequenz,Max. Trittfrequenz,Normalized Power® (NP®),Training Stress Score®,Ø Leistung,Max. Leistung,Schläge insgesamt,Ø Swolf,Ø Schlagrate,Schritte,Wiederholungen insgesamt,Sätze insgesamt,Minimale Temperatur,Dekompression,Beste Rundenzeit,Anzahl der Runden,Maximale Temperatur,Ø Atemfrequenz,Minimale Atemfrequenz,Maximale Atemfrequenz,Zeit in Bewegung,Verstrichene Zeit,Minimale Höhe,Maximale Höhe
|
||||||
|
Krafttraining,2025-09-23 18:03:52,false,"Krafttraining","0.00","136","00:24:35","102","134","0.5","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","8","4","1","26.0","Nein","00:24:35","1","29.0","--","--","--","00:24:32","00:24:35","--","--"
|
||||||
|
Krafttraining,2025-09-22 19:03:03,false,"Krafttraining","0.00","224","00:42:47","103","133","2.0","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","226","113","12","22.0","Nein","00:42:47","1","26.0","--","--","--","00:04:27.2","00:42:47","--","--"
|
||||||
|
Rennradfahren,2025-09-21 12:10:15,false,"Walenstadt Rennradfahren","87.59","1,724","02:48:41","158","179","4.5","--","--","31.2","61.2","441","439","--","--","--","--","--","--","86","111","--","0.0","--","--","14247","--","--","--","--","--","25.0","Nein","00:05:29.0","18","35.0","35","22","45","02:48:31","02:57:35","426","596"
|
||||||
|
Krafttraining,2025-09-19 19:08:18,false,"Krafttraining","0.00","403","01:12:08","110","155","0.6","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","652","326","26","25.0","Nein","01:12:08","1","28.0","--","--","--","00:13:10","01:12:08","--","--"
|
||||||
|
Rennradfahren,2025-09-18 18:30:39,false,"Walenstadt Rennradfahren","30.01","671","00:56:08","152","185","3.2","--","--","32.1","52.2","128","120","--","--","--","--","--","--","86","120","--","0.0","--","--","4790","--","--","--","--","--","17.0","Nein","00:00:01.7","7","24.0","33","23","41","00:56:05","00:56:08","426","497"
|
||||||
|
Rennradfahren,2025-09-17 15:30:28,false,"Walenstadt Rennradfahren","62.83","1,379","02:06:52","157","196","4.1","--","--","29.7","79.2","518","514","--","--","--","--","--","--","85","120","--","0.0","--","--","10431","--","--","--","--","--","18.0","Nein","00:05:20.0","13","23.0","34","24","45","02:06:47","02:10:28","426","715"
|
||||||
|
Cardio,2025-09-16 13:59:18,false,"Cardio","0.00","471","02:12:50","83","146","1.1","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","1,032","--","--","--","Nein","02:12:50","1","--","--","--","--","00:00:00","02:12:50","--","--"
|
||||||
|
Laufen,2025-09-15 18:47:23,false,"Walenstadt Laufen","6.06","508","00:43:15","147","188","2.8","149","186","7:08","3:03","15","12","0.94","10.4","9.8","315","49.5 % Links / 50.5 % Rechts","7:11","--","--","337","0.0","330","695","--","--","--","6,390","--","--","--","Nein","00:00:12.5","7","--","32","23","41","00:42:52","00:43:15","417","426"
|
||||||
|
Schwimmbad,2025-09-15 13:33:41,false,"Schwimmbad","2,325","494","00:51:44","135","151","2.4","--","--","2:14","1:46","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","1241","47","24","--","--","--","--","Nein","00:51:44","1","--","--","--","--","00:51:44","00:52:40","--","--"
|
||||||
|
Rennradfahren,2025-09-14 12:24:30,false,"Walenstadt Rennradfahren","30.11","877","01:08:03","164","189","4.4","--","--","26.5","76.4","399","400","--","--","--","--","--","--","85","126","--","0.0","--","--","5456","--","--","--","--","--","18.0","Nein","00:00:12.4","7","25.0","35","24","47","01:07:58","01:11:44","426","729"
|
||||||
|
Rennradfahren,2025-09-12 17:02:31,false,"Walenstadt Rennradfahren","30.05","781","00:54:38","168","199","4.1","--","--","33.0","50.5","130","127","--","--","--","--","--","--","89","109","--","0.0","--","--","4737","--","--","--","--","--","16.0","Nein","00:00:04.0","7","19.0","37","26","46","00:54:32","00:59:57","427","499"
|
||||||
|
Krafttraining,2025-09-11 18:52:04,false,"Krafttraining","0.00","220","00:39:14","105","166","0.8","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","220","110","12","24.0","Nein","00:39:14","1","26.0","--","--","--","00:06:50.4","00:39:14","--","--"
|
||||||
|
Rennradfahren,2025-09-10 19:00:07,false,"Walenstadt Rennradfahren","29.85","737","00:55:28","159","192","3.9","--","--","32.3","56.9","128","125","--","--","--","--","--","--","87","115","--","0.0","--","--","4628","--","--","--","--","--","16.0","Nein","00:07:12.5","6","22.0","34","19","43","00:55:25","00:55:28","427","498"
|
||||||
|
Radfahren,2025-09-10 17:47:49,false,"Radfahren","0.41","6","00:02:00","--","--","--","--","--","12.4","48.1","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","197","--","--","--","--","--","--","Nein","00:02:00","1","--","--","--","--","00:01:45","00:02:30","--","--"
|
||||||
|
Radfahren,2025-09-10 17:12:48,false,"Radfahren","0.21","6","00:02:05","--","--","--","--","--","6.2","41.9","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","102","--","--","--","--","--","--","Nein","00:02:05","1","--","--","--","--","00:00:55","00:09:00","--","--"
|
||||||
|
Krafttraining,2025-09-09 19:04:42,false,"Krafttraining","0.00","278","00:53:18","100","139","0.6","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","420","210","17","24.0","Nein","00:53:18","1","27.0","--","--","--","00:07:01.5","00:53:18","--","--"
|
||||||
|
Krafttraining,2025-09-07 11:32:58,false,"Krafttraining","0.00","272","00:34:46","119","151","1.3","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","202","101","11","26.0","Nein","00:34:46","1","27.0","--","--","--","00:04:58.5","00:34:46","--","--"
|
||||||
|
Radfahren,2025-09-06 11:23:07,false,"Radfahren","4.10","151","00:10:45","--","--","--","--","--","22.9","40.4","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","636","--","--","--","--","--","--","Nein","00:10:45","1","--","--","--","--","00:10:35","00:11:00","--","--"
|
||||||
|
Rennradfahren,2025-09-06 10:58:49,false,"Walenstadt Rennradfahren","126.36","2,401","04:28:11","145","175","3.9","--","--","28.3","54.7","597","573","--","--","--","--","--","--","80","163","--","0.0","--","--","19676","--","--","--","--","--","15.0","Nein","00:03:58.3","26","28.0","33","20","47","04:27:27","05:13:45","409","466"
|
||||||
|
Krafttraining,2025-09-05 13:32:08,false,"Krafttrainin","0.00","203","00:48:40","91","147","0.2","--","--","--","--","--","--","--","--","--","--","--","--","--","--","--","0.0","--","--","--","--","--","880","440","31","26.0","Nein","00:48:40","1","30.0","--","--","--","00:17:29","00:49:11","--","--"
|
||||||
|
204
svm_modell.py
Normal file
204
svm_modell.py
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
# Machine Learning Modell 1 / Leistungsnachweis II Data Science / 1. Semester
|
||||||
|
# Support Vector Machine zu binären Klassifikation
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
|
||||||
|
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
|
||||||
|
from sklearn.impute import SimpleImputer
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from datetime import datetime
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
def load_and_preprocess_data():
|
||||||
|
|
||||||
|
# Lädt und bereitet die Daten aus verschiedenen CSV-Dateien vor
|
||||||
|
|
||||||
|
# CSV-Dateien laden
|
||||||
|
activities = pd.read_csv("Activities_rohdaten.csv", sep=None, engine="python")
|
||||||
|
|
||||||
|
# Daten bereinigen und konsolidieren
|
||||||
|
df = activities.copy()
|
||||||
|
|
||||||
|
# Spaltennamen bereinigen
|
||||||
|
df.columns = df.columns.str.strip()
|
||||||
|
|
||||||
|
# Sportarten für binäre Klassifikation definieren
|
||||||
|
|
||||||
|
# Wir klassifizieren zwischen 'Ausdauersport' und 'Kraftsport'
|
||||||
|
endurance_sports = ['Laufen', 'Rennradfahren', 'Schwimmen', 'Radfahren']
|
||||||
|
strength_sports = ['Krafttraining']
|
||||||
|
|
||||||
|
# Binäre Zielvariable erstellen
|
||||||
|
df['sport_category'] = df['Aktivitätstyp'].apply(lambda x: 1
|
||||||
|
if x in endurance_sports
|
||||||
|
else (0 if x in strength_sports else -1))
|
||||||
|
|
||||||
|
# Nur gültige Kategorien behalten
|
||||||
|
df = df[df['sport_category'] != -1]
|
||||||
|
|
||||||
|
# Features auswählen, die für die Klassifikation relevant sind
|
||||||
|
numeric_features = ['Distanz', 'Kalorien', 'Ø Herzfrequenz', 'Maximale Herzfrequenz', 'Aerober TE', 'Training Stress Score®']
|
||||||
|
|
||||||
|
# Nur verfügbare numerische Features auswählen
|
||||||
|
available_features = [col for col in numeric_features if col in df.columns]
|
||||||
|
|
||||||
|
return df, available_features
|
||||||
|
|
||||||
|
def feature_engineering(df, features): # Führt Feature Engineering durch
|
||||||
|
|
||||||
|
# Kopie erstellen
|
||||||
|
df_processed = df.copy()
|
||||||
|
|
||||||
|
# Numerische Features bereinigen
|
||||||
|
for feature in features:
|
||||||
|
if df_processed[feature].dtype == 'object':
|
||||||
|
df_processed[feature] = df_processed[feature].astype(str).str.replace(',', '')
|
||||||
|
|
||||||
|
# In numerische Werte umwandeln
|
||||||
|
df_processed[feature] = pd.to_numeric(df_processed[feature], errors='coerce')
|
||||||
|
|
||||||
|
# Zeitbasierte Features aus Datum extrahieren
|
||||||
|
if 'Datum' in df_processed.columns:
|
||||||
|
df_processed['Datum'] = pd.to_datetime(df_processed['Datum'], errors='coerce')
|
||||||
|
df_processed['hour'] = df_processed['Datum'].dt.hour
|
||||||
|
df_processed['day_of_week'] = df_processed['Datum'].dt.dayofweek
|
||||||
|
df_processed['month'] = df_processed['Datum'].dt.month
|
||||||
|
|
||||||
|
# Zeitbasierte Features hinzufügen
|
||||||
|
time_features = ['hour', 'day_of_week', 'month']
|
||||||
|
features.extend(time_features)
|
||||||
|
|
||||||
|
# Fehlende Werte behandeln
|
||||||
|
X = df_processed[features]
|
||||||
|
y = df_processed['sport_category']
|
||||||
|
|
||||||
|
# SimpleImputer für fehlende Werte
|
||||||
|
imputer = SimpleImputer(strategy='median')
|
||||||
|
X_imputed = imputer.fit_transform(X)
|
||||||
|
|
||||||
|
return X_imputed, y, features, imputer
|
||||||
|
|
||||||
|
def train_svm_model(X, y): # Trainiert das SVM-Modell mit Hyperparameter-Tuning
|
||||||
|
|
||||||
|
# Daten in Trainings- und Testsets aufteilen
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
|
||||||
|
|
||||||
|
# Features skalieren
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X_train_scaled = scaler.fit_transform(X_train)
|
||||||
|
X_test_scaled = scaler.transform(X_test)
|
||||||
|
|
||||||
|
# SVM-Modell mit Hyperparameter-Tuning
|
||||||
|
param_grid = {
|
||||||
|
'C': [0.1, 1, 10, 100],
|
||||||
|
'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
|
||||||
|
'kernel': ['rbf', 'linear']
|
||||||
|
}
|
||||||
|
|
||||||
|
svm = SVC(random_state=42)
|
||||||
|
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
|
||||||
|
grid_search.fit(X_train_scaled, y_train)
|
||||||
|
|
||||||
|
# Bestes Modell
|
||||||
|
best_svm = grid_search.best_estimator_
|
||||||
|
|
||||||
|
# Vorhersagen
|
||||||
|
y_pred = best_svm.predict(X_test_scaled)
|
||||||
|
|
||||||
|
# Kreuzvalidierung
|
||||||
|
cv_scores = cross_val_score(best_svm, X_train_scaled, y_train, cv=5)
|
||||||
|
|
||||||
|
return best_svm, X_test_scaled, y_test, y_pred, grid_search.best_params_, cv_scores, scaler
|
||||||
|
|
||||||
|
def evaluate_model(y_test, y_pred, cv_scores, best_params): # Evaluiert das Modell und gibt Ergebnisse aus
|
||||||
|
|
||||||
|
print("=== SVM-Modell für Sportarten-Klassifikation ===")
|
||||||
|
print(f"Anzahl der Datensätze: {len(y_test) + len(y_pred)}")
|
||||||
|
print(f"\nBeste Hyperparameter: {best_params}")
|
||||||
|
|
||||||
|
print(f"\nKreuzvalidierung (CV-Scores): {cv_scores}")
|
||||||
|
print(f"Mittlere CV-Genauigkeit: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
|
||||||
|
|
||||||
|
print(f"\nTest-Genauigkeit: {accuracy_score(y_test, y_pred):.4f}")
|
||||||
|
|
||||||
|
print("\nKlassifikationsbericht:")
|
||||||
|
print(classification_report(y_test, y_pred, target_names=['Kraftsport', 'Ausdauersport']))
|
||||||
|
|
||||||
|
print("\nKonfusionsmatrix:")
|
||||||
|
cm = confusion_matrix(y_test, y_pred)
|
||||||
|
print(cm)
|
||||||
|
|
||||||
|
# Visualisierung der Konfusionsmatrix
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
||||||
|
xticklabels=['Kraftsport', 'Ausdauersport'],
|
||||||
|
yticklabels=['Kraftsport', 'Ausdauersport'])
|
||||||
|
plt.title('Konfusionsmatrix - SVM Sportarten-Klassifikation')
|
||||||
|
plt.ylabel('Wahre Klasse')
|
||||||
|
plt.xlabel('Vorhergesagte Klasse')
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
return accuracy_score(y_test, y_pred)
|
||||||
|
|
||||||
|
def feature_importance_analysis(model, X_test, feature_names): # Analysiert die Wichtigkeit der Features (für lineare SVM)
|
||||||
|
|
||||||
|
if model.kernel == 'linear':
|
||||||
|
# Feature-Wichtigkeit für lineare SVM
|
||||||
|
importance = np.abs(model.coef_[0])
|
||||||
|
feature_importance = pd.DataFrame({'feature': feature_names, 'importance': importance}).sort_values('importance', ascending=False)
|
||||||
|
|
||||||
|
print("\nFeature-Wichtigkeit (lineare SVM):")
|
||||||
|
print(feature_importance)
|
||||||
|
|
||||||
|
# Visualisierung
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
sns.barplot(data=feature_importance.head(10), x='importance', y='feature')
|
||||||
|
plt.title('Top 10 Feature-Wichtigkeiten')
|
||||||
|
plt.xlabel('Wichtigkeit')
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
print("\nFeature-Wichtigkeit nur für lineare SVM verfügbar")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
print("Starte SVM-Modell für Sportarten-Klassifikation...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Daten laden und vorverarbeiten
|
||||||
|
df, features = load_and_preprocess_data()
|
||||||
|
print(f"Daten geladen: {df.shape[0]} Datensätze, {len(features)} Features")
|
||||||
|
print(f"Verteilung der Sportarten: {df['sport_category'].value_counts().to_dict()}")
|
||||||
|
|
||||||
|
# 2. Feature Engineering
|
||||||
|
X, y, feature_names, imputer = feature_engineering(df, features)
|
||||||
|
print(f"Feature Engineering abgeschlossen: {X.shape[1]} Features")
|
||||||
|
|
||||||
|
# 3. SVM-Modell trainieren
|
||||||
|
model, X_test, y_test, y_pred, best_params, cv_scores, scaler = train_svm_model(X, y)
|
||||||
|
|
||||||
|
# 4. Modell evaluieren
|
||||||
|
accuracy = evaluate_model(y_test, y_pred, cv_scores, best_params)
|
||||||
|
|
||||||
|
# 5. Feature-Analyse
|
||||||
|
feature_importance_analysis(model, X_test, feature_names)
|
||||||
|
|
||||||
|
print(f"\nModell erfolgreich trainiert mit Genauigkeit: {accuracy:.4f}")
|
||||||
|
print("\nGespicherte Dateien: confusion_matrix.png, feature_importance.png")
|
||||||
|
|
||||||
|
return model, scaler, imputer, feature_names
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Fehler bei der Ausführung: {str(e)}")
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
model, scaler, imputer, feature_names = main()
|
||||||
Loading…
x
Reference in New Issue
Block a user