diff --git a/ml_calories.ipynb b/ml_calories.ipynb index bf4c44f..e520539 100644 --- a/ml_calories.ipynb +++ b/ml_calories.ipynb @@ -41,6 +41,7 @@ "cols = {\n", " \"Aktivitätstyp\": \"activity_type\",\n", " \"Distanz\": \"distance_km\",\n", + " \"Anstieg gesamt\": \"elevation_meters\",\n", " \"Kalorien\": \"calories_burned\",\n", " \"Zeit\": \"duration_str\",\n", " \"Ø Herzfrequenz\": \"heart_rate\"\n", @@ -109,6 +110,8 @@ "# get rid of commas in Distance and Calories Burned columns and convert to numeric\n", "data['distance_km'] = pd.to_numeric(\n", " data['distance_km'].str.replace(',', ''), errors='coerce')\n", + "data['elevation_meters'] = pd.to_numeric(\n", + " data['elevation_meters'].str.replace(',', ''), errors='coerce')\n", "data['calories_burned'] = pd.to_numeric(\n", " data['calories_burned'].str.replace(',', ''), errors='coerce')\n", "data['heart_rate'] = pd.to_numeric(\n", @@ -306,7 +309,8 @@ "outputs": [], "source": [ "# define features and target variable\n", - "features = ['activity_type', 'distance_km', 'duration_seconds', 'heart_rate']\n", + "features = ['activity_type', 'distance_km',\n", + " 'elevation_meters', 'duration_seconds', 'heart_rate']\n", "target = 'calories_burned'\n", "\n", "x = data[features]\n", @@ -319,7 +323,8 @@ " f\"Data split into {x_train.shape[0]} training rows and {x_test.shape[0]} testing rows.\")\n", "\n", "# Preprocessing: Scaling numeric features and encoding categorical features\n", - "numeric_features = ['distance_km', 'duration_seconds', 'heart_rate']\n", + "numeric_features = ['distance_km', 'elevation_meters',\n", + " 'duration_seconds', 'heart_rate']\n", "categorical_features = ['activity_type']\n", "preprocessor = ColumnTransformer(\n", " transformers=[\n",