{
"cells": [
{
"cell_type": "markdown",
"id": "f6f0a842",
"metadata": {},
"source": [
"# Exploratory Data Analysis"
]
},
{
"cell_type": "markdown",
"id": "386dcd6d",
"metadata": {},
"source": [
"import the required libraries"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f9b0ae4c",
"metadata": {},
"outputs": [],
"source": [
"#importing the required libraries\n",
"import numpy as np \n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import os \n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "1c92d2c2",
"metadata": {},
"outputs": [],
"source": [
"#load the datasets\n",
"df_activities = pd.read_csv(\"all_activities.csv\")\n",
"df_sleep = pd.read_csv(\"sleep.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "4f18cf6a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Aktivitätstyp | \n",
" Datum | \n",
" Favorit | \n",
" Titel | \n",
" Distanz | \n",
" Kalorien | \n",
" Zeit | \n",
" Ø Herzfrequenz | \n",
" Maximale Herzfrequenz | \n",
" Aerober TE | \n",
" ... | \n",
" Maximale Atemfrequenz | \n",
" Stressänderung | \n",
" Stress: Start | \n",
" Stress: Ende | \n",
" Ø Stress | \n",
" Maximaler Stress | \n",
" Zeit in Bewegung | \n",
" Verstrichene Zeit | \n",
" Minimale Höhe | \n",
" Maximale Höhe | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Laufen | \n",
" 2025-09-21 09:53:57 | \n",
" False | \n",
" Berlin - BMW Berlin Marathon (42.195 km) | \n",
" 42.65 | \n",
" 2,817 | \n",
" 04:35:15 | \n",
" 148 | \n",
" 165 | \n",
" 5.0 | \n",
" ... | \n",
" 41 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 04:34:55 | \n",
" 04:35:15 | \n",
" 33 | \n",
" 56 | \n",
"
\n",
" \n",
" | 1 | \n",
" Laufen | \n",
" 2025-09-20 15:18:50 | \n",
" False | \n",
" Berlin Laufen | \n",
" 3.72 | \n",
" 246 | \n",
" 00:20:13 | \n",
" 144 | \n",
" 161 | \n",
" 2.8 | \n",
" ... | \n",
" 38 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:20:12 | \n",
" 00:20:13 | \n",
" 40 | \n",
" 47 | \n",
"
\n",
" \n",
" | 2 | \n",
" Virtuelles Radfahren | \n",
" 2025-09-19 12:31:00 | \n",
" False | \n",
" Zwift - Renewal on Bridges and Boardwalks in M... | \n",
" 12.81 | \n",
" 210 | \n",
" 00:30:18 | \n",
" 116 | \n",
" 134 | \n",
" 1.5 | \n",
" ... | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:30:17 | \n",
" 00:30:17 | \n",
" 4 | \n",
" 59 | \n",
"
\n",
" \n",
" | 3 | \n",
" Laufen | \n",
" 2025-09-15 11:16:13 | \n",
" False | \n",
" Rüthi Laufen | \n",
" 5.60 | \n",
" 350 | \n",
" 00:30:39 | \n",
" 133 | \n",
" 151 | \n",
" 2.8 | \n",
" ... | \n",
" 39 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:30:37 | \n",
" 00:30:39 | \n",
" 421 | \n",
" 437 | \n",
"
\n",
" \n",
" | 4 | \n",
" Laufen | \n",
" 2025-09-13 10:51:36 | \n",
" False | \n",
" Rüthi Laufen | \n",
" 9.01 | \n",
" 549 | \n",
" 00:44:56 | \n",
" 144 | \n",
" 171 | \n",
" 3.5 | \n",
" ... | \n",
" 41 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:44:45 | \n",
" 00:44:56 | \n",
" 421 | \n",
" 435 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 52 columns
\n",
"
"
],
"text/plain": [
" Aktivitätstyp Datum Favorit \\\n",
"0 Laufen 2025-09-21 09:53:57 False \n",
"1 Laufen 2025-09-20 15:18:50 False \n",
"2 Virtuelles Radfahren 2025-09-19 12:31:00 False \n",
"3 Laufen 2025-09-15 11:16:13 False \n",
"4 Laufen 2025-09-13 10:51:36 False \n",
"\n",
" Titel Distanz Kalorien \\\n",
"0 Berlin - BMW Berlin Marathon (42.195 km) 42.65 2,817 \n",
"1 Berlin Laufen 3.72 246 \n",
"2 Zwift - Renewal on Bridges and Boardwalks in M... 12.81 210 \n",
"3 Rüthi Laufen 5.60 350 \n",
"4 Rüthi Laufen 9.01 549 \n",
"\n",
" Zeit Ø Herzfrequenz Maximale Herzfrequenz Aerober TE ... \\\n",
"0 04:35:15 148 165 5.0 ... \n",
"1 00:20:13 144 161 2.8 ... \n",
"2 00:30:18 116 134 1.5 ... \n",
"3 00:30:39 133 151 2.8 ... \n",
"4 00:44:56 144 171 3.5 ... \n",
"\n",
" Maximale Atemfrequenz Stressänderung Stress: Start Stress: Ende Ø Stress \\\n",
"0 41 -- -- -- -- \n",
"1 38 -- -- -- -- \n",
"2 -- -- -- -- -- \n",
"3 39 -- -- -- -- \n",
"4 41 -- -- -- -- \n",
"\n",
" Maximaler Stress Zeit in Bewegung Verstrichene Zeit Minimale Höhe \\\n",
"0 -- 04:34:55 04:35:15 33 \n",
"1 -- 00:20:12 00:20:13 40 \n",
"2 -- 00:30:17 00:30:17 4 \n",
"3 -- 00:30:37 00:30:39 421 \n",
"4 -- 00:44:45 00:44:56 421 \n",
"\n",
" Maximale Höhe \n",
"0 56 \n",
"1 47 \n",
"2 59 \n",
"3 437 \n",
"4 435 \n",
"\n",
"[5 rows x 52 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check activities data\n",
"df_activities.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "e65aa687",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Sleep Score 7 Tage | \n",
" Score | \n",
" Ruheherzfrequenz | \n",
" Body Battery | \n",
" Pulsoximeter | \n",
" Atmung | \n",
" HFV-Status | \n",
" Qualität | \n",
" Dauer | \n",
" Schlafbedürfnis | \n",
" Schlafenszeit | \n",
" Aufstehzeit;;;;;;;;;;; | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-30 | \n",
" 77 | \n",
" 44 | \n",
" 55 | \n",
" -- | \n",
" 11.38 | \n",
" 83 | \n",
" Ausreichend | \n",
" 6h 47min | \n",
" 7h 0min | \n",
" 11:22 PM | \n",
" 6:09 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-29 | \n",
" 73 | \n",
" 46 | \n",
" 62 | \n",
" -- | \n",
" 12 | \n",
" 84 | \n",
" Ausreichend | \n",
" 9h 6min | \n",
" 8h 40min | \n",
" 10:52 PM | \n",
" 8:02 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-28 | \n",
" 34 | \n",
" 47 | \n",
" 21 | \n",
" -- | \n",
" 14.96 | \n",
" 84 | \n",
" Schlecht | \n",
" 6h 34min | \n",
" 7h 40min | \n",
" 12:34 AM | \n",
" 8:09 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-27 | \n",
" 93 | \n",
" 41 | \n",
" 67 | \n",
" -- | \n",
" 11.13 | \n",
" 92 | \n",
" Ausgezeichnet | \n",
" 8h 32min | \n",
" 7h 40min | \n",
" 10:39 PM | \n",
" 7:20 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-26 | \n",
" 97 | \n",
" 42 | \n",
" 71 | \n",
" -- | \n",
" 11.15 | \n",
" 90 | \n",
" Ausgezeichnet | \n",
" 7h 50min | \n",
" 7h 40min | \n",
" 10:07 PM | \n",
" 5:57 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Sleep Score 7 Tage Score Ruheherzfrequenz Body Battery Pulsoximeter Atmung \\\n",
"0 2025-09-30 77 44 55 -- 11.38 \n",
"1 2025-09-29 73 46 62 -- 12 \n",
"2 2025-09-28 34 47 21 -- 14.96 \n",
"3 2025-09-27 93 41 67 -- 11.13 \n",
"4 2025-09-26 97 42 71 -- 11.15 \n",
"\n",
" HFV-Status Qualität Dauer Schlafbedürfnis Schlafenszeit \\\n",
"0 83 Ausreichend 6h 47min 7h 0min 11:22 PM \n",
"1 84 Ausreichend 9h 6min 8h 40min 10:52 PM \n",
"2 84 Schlecht 6h 34min 7h 40min 12:34 AM \n",
"3 92 Ausgezeichnet 8h 32min 7h 40min 10:39 PM \n",
"4 90 Ausgezeichnet 7h 50min 7h 40min 10:07 PM \n",
"\n",
" Aufstehzeit;;;;;;;;;;; \n",
"0 6:09 AM;;;;;;;;;;; \n",
"1 8:02 AM;;;;;;;;;;; \n",
"2 8:09 AM;;;;;;;;;;; \n",
"3 7:20 AM;;;;;;;;;;; \n",
"4 5:57 AM;;;;;;;;;;; "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check sleep data\n",
"df_sleep.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "2b832a91",
"metadata": {},
"outputs": [],
"source": [
"# change the first column name to Datum\n",
"df_sleep.rename(columns={df_sleep.columns[0]: 'Datum'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "70fe281d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Score | \n",
" Ruheherzfrequenz | \n",
" Body Battery | \n",
" Pulsoximeter | \n",
" Atmung | \n",
" HFV-Status | \n",
" Qualität | \n",
" Dauer | \n",
" Schlafbedürfnis | \n",
" Schlafenszeit | \n",
" Aufstehzeit;;;;;;;;;;; | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-30 | \n",
" 77 | \n",
" 44 | \n",
" 55 | \n",
" -- | \n",
" 11.38 | \n",
" 83 | \n",
" Ausreichend | \n",
" 6h 47min | \n",
" 7h 0min | \n",
" 11:22 PM | \n",
" 6:09 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-29 | \n",
" 73 | \n",
" 46 | \n",
" 62 | \n",
" -- | \n",
" 12 | \n",
" 84 | \n",
" Ausreichend | \n",
" 9h 6min | \n",
" 8h 40min | \n",
" 10:52 PM | \n",
" 8:02 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-28 | \n",
" 34 | \n",
" 47 | \n",
" 21 | \n",
" -- | \n",
" 14.96 | \n",
" 84 | \n",
" Schlecht | \n",
" 6h 34min | \n",
" 7h 40min | \n",
" 12:34 AM | \n",
" 8:09 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-27 | \n",
" 93 | \n",
" 41 | \n",
" 67 | \n",
" -- | \n",
" 11.13 | \n",
" 92 | \n",
" Ausgezeichnet | \n",
" 8h 32min | \n",
" 7h 40min | \n",
" 10:39 PM | \n",
" 7:20 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-26 | \n",
" 97 | \n",
" 42 | \n",
" 71 | \n",
" -- | \n",
" 11.15 | \n",
" 90 | \n",
" Ausgezeichnet | \n",
" 7h 50min | \n",
" 7h 40min | \n",
" 10:07 PM | \n",
" 5:57 AM;;;;;;;;;;; | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Score Ruheherzfrequenz Body Battery Pulsoximeter Atmung \\\n",
"0 2025-09-30 77 44 55 -- 11.38 \n",
"1 2025-09-29 73 46 62 -- 12 \n",
"2 2025-09-28 34 47 21 -- 14.96 \n",
"3 2025-09-27 93 41 67 -- 11.13 \n",
"4 2025-09-26 97 42 71 -- 11.15 \n",
"\n",
" HFV-Status Qualität Dauer Schlafbedürfnis Schlafenszeit \\\n",
"0 83 Ausreichend 6h 47min 7h 0min 11:22 PM \n",
"1 84 Ausreichend 9h 6min 8h 40min 10:52 PM \n",
"2 84 Schlecht 6h 34min 7h 40min 12:34 AM \n",
"3 92 Ausgezeichnet 8h 32min 7h 40min 10:39 PM \n",
"4 90 Ausgezeichnet 7h 50min 7h 40min 10:07 PM \n",
"\n",
" Aufstehzeit;;;;;;;;;;; \n",
"0 6:09 AM;;;;;;;;;;; \n",
"1 8:02 AM;;;;;;;;;;; \n",
"2 8:09 AM;;;;;;;;;;; \n",
"3 7:20 AM;;;;;;;;;;; \n",
"4 5:57 AM;;;;;;;;;;; "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check the sleep data again\n",
"df_sleep.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "daebf9ac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Aktivitätstyp | \n",
" Datum | \n",
" Favorit | \n",
" Titel | \n",
" Distanz | \n",
" Kalorien | \n",
" Zeit | \n",
" Ø Herzfrequenz | \n",
" Maximale Herzfrequenz | \n",
" Aerober TE | \n",
" ... | \n",
" Maximale Atemfrequenz | \n",
" Stressänderung | \n",
" Stress: Start | \n",
" Stress: Ende | \n",
" Ø Stress | \n",
" Maximaler Stress | \n",
" Zeit in Bewegung | \n",
" Verstrichene Zeit | \n",
" Minimale Höhe | \n",
" Maximale Höhe | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Laufen | \n",
" 2025-09-21 09:53:57 | \n",
" False | \n",
" Berlin - BMW Berlin Marathon (42.195 km) | \n",
" 42.65 | \n",
" 2,817 | \n",
" 04:35:15 | \n",
" 148 | \n",
" 165 | \n",
" 5.0 | \n",
" ... | \n",
" 41 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 04:34:55 | \n",
" 04:35:15 | \n",
" 33 | \n",
" 56 | \n",
"
\n",
" \n",
" | 1 | \n",
" Laufen | \n",
" 2025-09-20 15:18:50 | \n",
" False | \n",
" Berlin Laufen | \n",
" 3.72 | \n",
" 246 | \n",
" 00:20:13 | \n",
" 144 | \n",
" 161 | \n",
" 2.8 | \n",
" ... | \n",
" 38 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:20:12 | \n",
" 00:20:13 | \n",
" 40 | \n",
" 47 | \n",
"
\n",
" \n",
" | 2 | \n",
" Virtuelles Radfahren | \n",
" 2025-09-19 12:31:00 | \n",
" False | \n",
" Zwift - Renewal on Bridges and Boardwalks in M... | \n",
" 12.81 | \n",
" 210 | \n",
" 00:30:18 | \n",
" 116 | \n",
" 134 | \n",
" 1.5 | \n",
" ... | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:30:17 | \n",
" 00:30:17 | \n",
" 4 | \n",
" 59 | \n",
"
\n",
" \n",
" | 3 | \n",
" Laufen | \n",
" 2025-09-15 11:16:13 | \n",
" False | \n",
" Rüthi Laufen | \n",
" 5.60 | \n",
" 350 | \n",
" 00:30:39 | \n",
" 133 | \n",
" 151 | \n",
" 2.8 | \n",
" ... | \n",
" 39 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:30:37 | \n",
" 00:30:39 | \n",
" 421 | \n",
" 437 | \n",
"
\n",
" \n",
" | 4 | \n",
" Laufen | \n",
" 2025-09-13 10:51:36 | \n",
" False | \n",
" Rüthi Laufen | \n",
" 9.01 | \n",
" 549 | \n",
" 00:44:56 | \n",
" 144 | \n",
" 171 | \n",
" 3.5 | \n",
" ... | \n",
" 41 | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" -- | \n",
" 00:44:45 | \n",
" 00:44:56 | \n",
" 421 | \n",
" 435 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 52 columns
\n",
"
"
],
"text/plain": [
" Aktivitätstyp Datum Favorit \\\n",
"0 Laufen 2025-09-21 09:53:57 False \n",
"1 Laufen 2025-09-20 15:18:50 False \n",
"2 Virtuelles Radfahren 2025-09-19 12:31:00 False \n",
"3 Laufen 2025-09-15 11:16:13 False \n",
"4 Laufen 2025-09-13 10:51:36 False \n",
"\n",
" Titel Distanz Kalorien \\\n",
"0 Berlin - BMW Berlin Marathon (42.195 km) 42.65 2,817 \n",
"1 Berlin Laufen 3.72 246 \n",
"2 Zwift - Renewal on Bridges and Boardwalks in M... 12.81 210 \n",
"3 Rüthi Laufen 5.60 350 \n",
"4 Rüthi Laufen 9.01 549 \n",
"\n",
" Zeit Ø Herzfrequenz Maximale Herzfrequenz Aerober TE ... \\\n",
"0 04:35:15 148 165 5.0 ... \n",
"1 00:20:13 144 161 2.8 ... \n",
"2 00:30:18 116 134 1.5 ... \n",
"3 00:30:39 133 151 2.8 ... \n",
"4 00:44:56 144 171 3.5 ... \n",
"\n",
" Maximale Atemfrequenz Stressänderung Stress: Start Stress: Ende Ø Stress \\\n",
"0 41 -- -- -- -- \n",
"1 38 -- -- -- -- \n",
"2 -- -- -- -- -- \n",
"3 39 -- -- -- -- \n",
"4 41 -- -- -- -- \n",
"\n",
" Maximaler Stress Zeit in Bewegung Verstrichene Zeit Minimale Höhe \\\n",
"0 -- 04:34:55 04:35:15 33 \n",
"1 -- 00:20:12 00:20:13 40 \n",
"2 -- 00:30:17 00:30:17 4 \n",
"3 -- 00:30:37 00:30:39 421 \n",
"4 -- 00:44:45 00:44:56 421 \n",
"\n",
" Maximale Höhe \n",
"0 56 \n",
"1 47 \n",
"2 59 \n",
"3 437 \n",
"4 435 \n",
"\n",
"[5 rows x 52 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_activities.head()"
]
},
{
"cell_type": "markdown",
"id": "119e7f65",
"metadata": {},
"source": [
"Um die Frage \"Wie sich der Sport auf die Ruheherzfrequenz (RHF) auswirkt\" zu beantworten, erstellen wir einen neuen, kombinierten Datensatz.\n",
"\n",
"Dafür nutzen wir:\n",
"\n",
"Aus sleep.csv: Das Date (Datum) und die RHR (Ruheherzfrequenz). Dies ist der Wert, den wir messen möchten.\n",
"\n",
"Aus all-activities.csv: Den Activity_Timestamp (Zeitstempel der Aktivität) und die Calorie (Kalorienverbrauch).\n",
"\n",
"Zuerst summieren wir die Calorie-Werte pro Datum in der all-activities.csv, um die tägliche Gesamtaktivität zu erhalten. Anschließend verbinden wir diese täglichen Aktivitätsdaten mit den RHF-Werten aus der sleep.csv über das gemeinsame Datum."
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "411dec6a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Ruheherzfrequenz | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-30 | \n",
" 44 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-29 | \n",
" 46 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-28 | \n",
" 47 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-27 | \n",
" 41 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-26 | \n",
" 42 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Ruheherzfrequenz\n",
"0 2025-09-30 44\n",
"1 2025-09-29 46\n",
"2 2025-09-28 47\n",
"3 2025-09-27 41\n",
"4 2025-09-26 42"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pull the Datum and Ruheherzfrequenz from sleep.csv\n",
"df_sleep_filtered = df_sleep[['Datum', 'Ruheherzfrequenz']]\n",
"df_sleep_filtered.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "8876cc1f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Kalorien | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-21 09:53:57 | \n",
" 2,817 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-20 15:18:50 | \n",
" 246 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-19 12:31:00 | \n",
" 210 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-15 11:16:13 | \n",
" 350 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-13 10:51:36 | \n",
" 549 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Kalorien\n",
"0 2025-09-21 09:53:57 2,817\n",
"1 2025-09-20 15:18:50 246\n",
"2 2025-09-19 12:31:00 210\n",
"3 2025-09-15 11:16:13 350\n",
"4 2025-09-13 10:51:36 549"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pull the Datum and Kalorien from all-activities.csv\n",
"df_activities_filtered = df_activities[['Datum', 'Kalorien']]\n",
"df_activities_filtered.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "54d3116d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2850544847.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum']).dt.date\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Kalorien | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-21 | \n",
" 2,817 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-20 | \n",
" 246 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-19 | \n",
" 210 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-15 | \n",
" 350 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-13 | \n",
" 549 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Kalorien\n",
"0 2025-09-21 2,817\n",
"1 2025-09-20 246\n",
"2 2025-09-19 210\n",
"3 2025-09-15 350\n",
"4 2025-09-13 549"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# for the Datum column, get rid of the time part and keep only the date part\n",
"df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum']).dt.date\n",
"df_activities_filtered.head()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "f26dc1cf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Kalorien | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-21 | \n",
" 2817 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-20 | \n",
" 246 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-19 | \n",
" 210 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-15 | \n",
" 350 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-13 | \n",
" 549 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Kalorien\n",
"0 2025-09-21 2817\n",
"1 2025-09-20 246\n",
"2 2025-09-19 210\n",
"3 2025-09-15 350\n",
"4 2025-09-13 549"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_activities_filtered.head()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "2caa80da",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Datum object\n",
"Kalorien object\n",
"dtype: object"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check the data types\n",
"df_activities_filtered.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "503b1ae9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'])\n",
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].str.replace(',', '')\n",
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].replace('--', '0')\n",
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:8: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].astype(int)\n"
]
},
{
"data": {
"text/plain": [
"Datum datetime64[ns]\n",
"Kalorien int64\n",
"dtype: object"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# change the data type of Datum to datetime\n",
"df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'])\n",
"#get rid of commas in Kalorien column\n",
"df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].str.replace(',', '')\n",
"# replace all \"--\" values with 0\n",
"df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].replace('--', '0')\n",
"# change the data type of Kalorien to integer\n",
"df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].astype(int)\n",
"df_activities_filtered.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "3ca1d34a",
"metadata": {},
"outputs": [],
"source": [
"#write this cleaned data to a new csv file\n",
"df_activities_filtered.to_csv('cleaned_activities.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "8e6a8924",
"metadata": {},
"outputs": [],
"source": [
"#write the cleaned sleep data to new csv files\n",
"df_sleep_filtered.to_csv('cleaned_sleep.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "c19c4423",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check if each date is unique in df_activities_filtered\n",
"df_activities_filtered['Datum'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "8c8dfaa6",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2358408937.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'], format='%d.%m.%Y').dt.date\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Kalorien | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-21 | \n",
" 2817 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-20 | \n",
" 246 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-19 | \n",
" 210 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-15 | \n",
" 350 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-13 | \n",
" 549 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Kalorien\n",
"0 2025-09-21 2817\n",
"1 2025-09-20 246\n",
"2 2025-09-19 210\n",
"3 2025-09-15 350\n",
"4 2025-09-13 549"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# change the Datum column to day month year format\n",
"df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'], format='%d.%m.%Y').dt.date\n",
"df_activities_filtered.head()"
]
},
{
"cell_type": "markdown",
"id": "0d0dd445",
"metadata": {},
"source": [
"We see that each value is not unique in Datum which suggests that for some days there are more than one entry."
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "31a6f98d",
"metadata": {},
"outputs": [],
"source": [
"# sum the Kalorien values per Date in all-activities.csv to get daily total activity\n",
"df_activities_daily = df_activities_filtered.groupby('Datum').sum().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "864c302b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Datum | \n",
" Kalorien | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2021-07-07 | \n",
" 432 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2021-07-08 | \n",
" 544 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2021-07-12 | \n",
" 441 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2021-07-13 | \n",
" 384 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2021-08-20 | \n",
" 891 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Datum Kalorien\n",
"0 2021-07-07 432\n",
"1 2021-07-08 544\n",
"2 2021-07-12 441\n",
"3 2021-07-13 384\n",
"4 2021-08-20 891"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_activities_daily.head()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "b4c67f8e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#check if the dates are unique now\n",
"df_activities_daily['Datum'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3965ac4a",
"metadata": {},
"outputs": [],
"source": [
"# Sum the Calorie values per Date in all-activities.csv to get daily total activity\n",
"df_activities_daily = df_activities_filtered.resample('D', on='Datum').sum().reset_index()\n",
"# Merge the daily activity data with the RHR data from sleep.csv on the Date\n",
"df_combined = pd.merge(df_activities_daily, df_sleep[['Datum', 'Ruheherzfrequenz']], left_on='Datum', right_on='Datum', how='inner')\n",
"df_combined.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}