1557 lines
49 KiB
Plaintext
1557 lines
49 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f6f0a842",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Exploratory Data Analysis"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "386dcd6d",
|
||
"metadata": {},
|
||
"source": [
|
||
"import the required libraries"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "f9b0ae4c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#importing the required libraries\n",
|
||
"import numpy as np \n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import pandas as pd\n",
|
||
"import seaborn as sns\n",
|
||
"import os \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"id": "1c92d2c2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#load the datasets\n",
|
||
"df_activities = pd.read_csv(\"all_activities.csv\")\n",
|
||
"df_sleep = pd.read_csv(\"sleep.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "4f18cf6a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Aktivitätstyp</th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Favorit</th>\n",
|
||
" <th>Titel</th>\n",
|
||
" <th>Distanz</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" <th>Zeit</th>\n",
|
||
" <th>Ø Herzfrequenz</th>\n",
|
||
" <th>Maximale Herzfrequenz</th>\n",
|
||
" <th>Aerober TE</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Maximale Atemfrequenz</th>\n",
|
||
" <th>Stressänderung</th>\n",
|
||
" <th>Stress: Start</th>\n",
|
||
" <th>Stress: Ende</th>\n",
|
||
" <th>Ø Stress</th>\n",
|
||
" <th>Maximaler Stress</th>\n",
|
||
" <th>Zeit in Bewegung</th>\n",
|
||
" <th>Verstrichene Zeit</th>\n",
|
||
" <th>Minimale Höhe</th>\n",
|
||
" <th>Maximale Höhe</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-21 09:53:57</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Berlin - BMW Berlin Marathon (42.195 km)</td>\n",
|
||
" <td>42.65</td>\n",
|
||
" <td>2,817</td>\n",
|
||
" <td>04:35:15</td>\n",
|
||
" <td>148</td>\n",
|
||
" <td>165</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>04:34:55</td>\n",
|
||
" <td>04:35:15</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>56</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-20 15:18:50</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Berlin Laufen</td>\n",
|
||
" <td>3.72</td>\n",
|
||
" <td>246</td>\n",
|
||
" <td>00:20:13</td>\n",
|
||
" <td>144</td>\n",
|
||
" <td>161</td>\n",
|
||
" <td>2.8</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:20:12</td>\n",
|
||
" <td>00:20:13</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Virtuelles Radfahren</td>\n",
|
||
" <td>2025-09-19 12:31:00</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Zwift - Renewal on Bridges and Boardwalks in M...</td>\n",
|
||
" <td>12.81</td>\n",
|
||
" <td>210</td>\n",
|
||
" <td>00:30:18</td>\n",
|
||
" <td>116</td>\n",
|
||
" <td>134</td>\n",
|
||
" <td>1.5</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:30:17</td>\n",
|
||
" <td>00:30:17</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>59</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-15 11:16:13</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Rüthi Laufen</td>\n",
|
||
" <td>5.60</td>\n",
|
||
" <td>350</td>\n",
|
||
" <td>00:30:39</td>\n",
|
||
" <td>133</td>\n",
|
||
" <td>151</td>\n",
|
||
" <td>2.8</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:30:37</td>\n",
|
||
" <td>00:30:39</td>\n",
|
||
" <td>421</td>\n",
|
||
" <td>437</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-13 10:51:36</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Rüthi Laufen</td>\n",
|
||
" <td>9.01</td>\n",
|
||
" <td>549</td>\n",
|
||
" <td>00:44:56</td>\n",
|
||
" <td>144</td>\n",
|
||
" <td>171</td>\n",
|
||
" <td>3.5</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:44:45</td>\n",
|
||
" <td>00:44:56</td>\n",
|
||
" <td>421</td>\n",
|
||
" <td>435</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 52 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Aktivitätstyp Datum Favorit \\\n",
|
||
"0 Laufen 2025-09-21 09:53:57 False \n",
|
||
"1 Laufen 2025-09-20 15:18:50 False \n",
|
||
"2 Virtuelles Radfahren 2025-09-19 12:31:00 False \n",
|
||
"3 Laufen 2025-09-15 11:16:13 False \n",
|
||
"4 Laufen 2025-09-13 10:51:36 False \n",
|
||
"\n",
|
||
" Titel Distanz Kalorien \\\n",
|
||
"0 Berlin - BMW Berlin Marathon (42.195 km) 42.65 2,817 \n",
|
||
"1 Berlin Laufen 3.72 246 \n",
|
||
"2 Zwift - Renewal on Bridges and Boardwalks in M... 12.81 210 \n",
|
||
"3 Rüthi Laufen 5.60 350 \n",
|
||
"4 Rüthi Laufen 9.01 549 \n",
|
||
"\n",
|
||
" Zeit Ø Herzfrequenz Maximale Herzfrequenz Aerober TE ... \\\n",
|
||
"0 04:35:15 148 165 5.0 ... \n",
|
||
"1 00:20:13 144 161 2.8 ... \n",
|
||
"2 00:30:18 116 134 1.5 ... \n",
|
||
"3 00:30:39 133 151 2.8 ... \n",
|
||
"4 00:44:56 144 171 3.5 ... \n",
|
||
"\n",
|
||
" Maximale Atemfrequenz Stressänderung Stress: Start Stress: Ende Ø Stress \\\n",
|
||
"0 41 -- -- -- -- \n",
|
||
"1 38 -- -- -- -- \n",
|
||
"2 -- -- -- -- -- \n",
|
||
"3 39 -- -- -- -- \n",
|
||
"4 41 -- -- -- -- \n",
|
||
"\n",
|
||
" Maximaler Stress Zeit in Bewegung Verstrichene Zeit Minimale Höhe \\\n",
|
||
"0 -- 04:34:55 04:35:15 33 \n",
|
||
"1 -- 00:20:12 00:20:13 40 \n",
|
||
"2 -- 00:30:17 00:30:17 4 \n",
|
||
"3 -- 00:30:37 00:30:39 421 \n",
|
||
"4 -- 00:44:45 00:44:56 421 \n",
|
||
"\n",
|
||
" Maximale Höhe \n",
|
||
"0 56 \n",
|
||
"1 47 \n",
|
||
"2 59 \n",
|
||
"3 437 \n",
|
||
"4 435 \n",
|
||
"\n",
|
||
"[5 rows x 52 columns]"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check activities data\n",
|
||
"df_activities.head()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "e65aa687",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Sleep Score 7 Tage</th>\n",
|
||
" <th>Score</th>\n",
|
||
" <th>Ruheherzfrequenz</th>\n",
|
||
" <th>Body Battery</th>\n",
|
||
" <th>Pulsoximeter</th>\n",
|
||
" <th>Atmung</th>\n",
|
||
" <th>HFV-Status</th>\n",
|
||
" <th>Qualität</th>\n",
|
||
" <th>Dauer</th>\n",
|
||
" <th>Schlafbedürfnis</th>\n",
|
||
" <th>Schlafenszeit</th>\n",
|
||
" <th>Aufstehzeit;;;;;;;;;;;</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-30</td>\n",
|
||
" <td>77</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>55</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>11.38</td>\n",
|
||
" <td>83</td>\n",
|
||
" <td>Ausreichend</td>\n",
|
||
" <td>6h 47min</td>\n",
|
||
" <td>7h 0min</td>\n",
|
||
" <td>11:22 PM</td>\n",
|
||
" <td>6:09 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-29</td>\n",
|
||
" <td>73</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>62</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>84</td>\n",
|
||
" <td>Ausreichend</td>\n",
|
||
" <td>9h 6min</td>\n",
|
||
" <td>8h 40min</td>\n",
|
||
" <td>10:52 PM</td>\n",
|
||
" <td>8:02 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-28</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>14.96</td>\n",
|
||
" <td>84</td>\n",
|
||
" <td>Schlecht</td>\n",
|
||
" <td>6h 34min</td>\n",
|
||
" <td>7h 40min</td>\n",
|
||
" <td>12:34 AM</td>\n",
|
||
" <td>8:09 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-27</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>67</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>11.13</td>\n",
|
||
" <td>92</td>\n",
|
||
" <td>Ausgezeichnet</td>\n",
|
||
" <td>8h 32min</td>\n",
|
||
" <td>7h 40min</td>\n",
|
||
" <td>10:39 PM</td>\n",
|
||
" <td>7:20 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-26</td>\n",
|
||
" <td>97</td>\n",
|
||
" <td>42</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>11.15</td>\n",
|
||
" <td>90</td>\n",
|
||
" <td>Ausgezeichnet</td>\n",
|
||
" <td>7h 50min</td>\n",
|
||
" <td>7h 40min</td>\n",
|
||
" <td>10:07 PM</td>\n",
|
||
" <td>5:57 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Sleep Score 7 Tage Score Ruheherzfrequenz Body Battery Pulsoximeter Atmung \\\n",
|
||
"0 2025-09-30 77 44 55 -- 11.38 \n",
|
||
"1 2025-09-29 73 46 62 -- 12 \n",
|
||
"2 2025-09-28 34 47 21 -- 14.96 \n",
|
||
"3 2025-09-27 93 41 67 -- 11.13 \n",
|
||
"4 2025-09-26 97 42 71 -- 11.15 \n",
|
||
"\n",
|
||
" HFV-Status Qualität Dauer Schlafbedürfnis Schlafenszeit \\\n",
|
||
"0 83 Ausreichend 6h 47min 7h 0min 11:22 PM \n",
|
||
"1 84 Ausreichend 9h 6min 8h 40min 10:52 PM \n",
|
||
"2 84 Schlecht 6h 34min 7h 40min 12:34 AM \n",
|
||
"3 92 Ausgezeichnet 8h 32min 7h 40min 10:39 PM \n",
|
||
"4 90 Ausgezeichnet 7h 50min 7h 40min 10:07 PM \n",
|
||
"\n",
|
||
" Aufstehzeit;;;;;;;;;;; \n",
|
||
"0 6:09 AM;;;;;;;;;;; \n",
|
||
"1 8:02 AM;;;;;;;;;;; \n",
|
||
"2 8:09 AM;;;;;;;;;;; \n",
|
||
"3 7:20 AM;;;;;;;;;;; \n",
|
||
"4 5:57 AM;;;;;;;;;;; "
|
||
]
|
||
},
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check sleep data\n",
|
||
"df_sleep.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "2b832a91",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# change the first column name to Datum\n",
|
||
"df_sleep.rename(columns={df_sleep.columns[0]: 'Datum'}, inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "70fe281d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Score</th>\n",
|
||
" <th>Ruheherzfrequenz</th>\n",
|
||
" <th>Body Battery</th>\n",
|
||
" <th>Pulsoximeter</th>\n",
|
||
" <th>Atmung</th>\n",
|
||
" <th>HFV-Status</th>\n",
|
||
" <th>Qualität</th>\n",
|
||
" <th>Dauer</th>\n",
|
||
" <th>Schlafbedürfnis</th>\n",
|
||
" <th>Schlafenszeit</th>\n",
|
||
" <th>Aufstehzeit;;;;;;;;;;;</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-30</td>\n",
|
||
" <td>77</td>\n",
|
||
" <td>44</td>\n",
|
||
" <td>55</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>11.38</td>\n",
|
||
" <td>83</td>\n",
|
||
" <td>Ausreichend</td>\n",
|
||
" <td>6h 47min</td>\n",
|
||
" <td>7h 0min</td>\n",
|
||
" <td>11:22 PM</td>\n",
|
||
" <td>6:09 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-29</td>\n",
|
||
" <td>73</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>62</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>84</td>\n",
|
||
" <td>Ausreichend</td>\n",
|
||
" <td>9h 6min</td>\n",
|
||
" <td>8h 40min</td>\n",
|
||
" <td>10:52 PM</td>\n",
|
||
" <td>8:02 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-28</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>14.96</td>\n",
|
||
" <td>84</td>\n",
|
||
" <td>Schlecht</td>\n",
|
||
" <td>6h 34min</td>\n",
|
||
" <td>7h 40min</td>\n",
|
||
" <td>12:34 AM</td>\n",
|
||
" <td>8:09 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-27</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>67</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>11.13</td>\n",
|
||
" <td>92</td>\n",
|
||
" <td>Ausgezeichnet</td>\n",
|
||
" <td>8h 32min</td>\n",
|
||
" <td>7h 40min</td>\n",
|
||
" <td>10:39 PM</td>\n",
|
||
" <td>7:20 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-26</td>\n",
|
||
" <td>97</td>\n",
|
||
" <td>42</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>11.15</td>\n",
|
||
" <td>90</td>\n",
|
||
" <td>Ausgezeichnet</td>\n",
|
||
" <td>7h 50min</td>\n",
|
||
" <td>7h 40min</td>\n",
|
||
" <td>10:07 PM</td>\n",
|
||
" <td>5:57 AM;;;;;;;;;;;</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Score Ruheherzfrequenz Body Battery Pulsoximeter Atmung \\\n",
|
||
"0 2025-09-30 77 44 55 -- 11.38 \n",
|
||
"1 2025-09-29 73 46 62 -- 12 \n",
|
||
"2 2025-09-28 34 47 21 -- 14.96 \n",
|
||
"3 2025-09-27 93 41 67 -- 11.13 \n",
|
||
"4 2025-09-26 97 42 71 -- 11.15 \n",
|
||
"\n",
|
||
" HFV-Status Qualität Dauer Schlafbedürfnis Schlafenszeit \\\n",
|
||
"0 83 Ausreichend 6h 47min 7h 0min 11:22 PM \n",
|
||
"1 84 Ausreichend 9h 6min 8h 40min 10:52 PM \n",
|
||
"2 84 Schlecht 6h 34min 7h 40min 12:34 AM \n",
|
||
"3 92 Ausgezeichnet 8h 32min 7h 40min 10:39 PM \n",
|
||
"4 90 Ausgezeichnet 7h 50min 7h 40min 10:07 PM \n",
|
||
"\n",
|
||
" Aufstehzeit;;;;;;;;;;; \n",
|
||
"0 6:09 AM;;;;;;;;;;; \n",
|
||
"1 8:02 AM;;;;;;;;;;; \n",
|
||
"2 8:09 AM;;;;;;;;;;; \n",
|
||
"3 7:20 AM;;;;;;;;;;; \n",
|
||
"4 5:57 AM;;;;;;;;;;; "
|
||
]
|
||
},
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check the sleep data again\n",
|
||
"df_sleep.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "daebf9ac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Aktivitätstyp</th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Favorit</th>\n",
|
||
" <th>Titel</th>\n",
|
||
" <th>Distanz</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" <th>Zeit</th>\n",
|
||
" <th>Ø Herzfrequenz</th>\n",
|
||
" <th>Maximale Herzfrequenz</th>\n",
|
||
" <th>Aerober TE</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Maximale Atemfrequenz</th>\n",
|
||
" <th>Stressänderung</th>\n",
|
||
" <th>Stress: Start</th>\n",
|
||
" <th>Stress: Ende</th>\n",
|
||
" <th>Ø Stress</th>\n",
|
||
" <th>Maximaler Stress</th>\n",
|
||
" <th>Zeit in Bewegung</th>\n",
|
||
" <th>Verstrichene Zeit</th>\n",
|
||
" <th>Minimale Höhe</th>\n",
|
||
" <th>Maximale Höhe</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-21 09:53:57</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Berlin - BMW Berlin Marathon (42.195 km)</td>\n",
|
||
" <td>42.65</td>\n",
|
||
" <td>2,817</td>\n",
|
||
" <td>04:35:15</td>\n",
|
||
" <td>148</td>\n",
|
||
" <td>165</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>04:34:55</td>\n",
|
||
" <td>04:35:15</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>56</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-20 15:18:50</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Berlin Laufen</td>\n",
|
||
" <td>3.72</td>\n",
|
||
" <td>246</td>\n",
|
||
" <td>00:20:13</td>\n",
|
||
" <td>144</td>\n",
|
||
" <td>161</td>\n",
|
||
" <td>2.8</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:20:12</td>\n",
|
||
" <td>00:20:13</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Virtuelles Radfahren</td>\n",
|
||
" <td>2025-09-19 12:31:00</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Zwift - Renewal on Bridges and Boardwalks in M...</td>\n",
|
||
" <td>12.81</td>\n",
|
||
" <td>210</td>\n",
|
||
" <td>00:30:18</td>\n",
|
||
" <td>116</td>\n",
|
||
" <td>134</td>\n",
|
||
" <td>1.5</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:30:17</td>\n",
|
||
" <td>00:30:17</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>59</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-15 11:16:13</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Rüthi Laufen</td>\n",
|
||
" <td>5.60</td>\n",
|
||
" <td>350</td>\n",
|
||
" <td>00:30:39</td>\n",
|
||
" <td>133</td>\n",
|
||
" <td>151</td>\n",
|
||
" <td>2.8</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:30:37</td>\n",
|
||
" <td>00:30:39</td>\n",
|
||
" <td>421</td>\n",
|
||
" <td>437</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Laufen</td>\n",
|
||
" <td>2025-09-13 10:51:36</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Rüthi Laufen</td>\n",
|
||
" <td>9.01</td>\n",
|
||
" <td>549</td>\n",
|
||
" <td>00:44:56</td>\n",
|
||
" <td>144</td>\n",
|
||
" <td>171</td>\n",
|
||
" <td>3.5</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>--</td>\n",
|
||
" <td>00:44:45</td>\n",
|
||
" <td>00:44:56</td>\n",
|
||
" <td>421</td>\n",
|
||
" <td>435</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 52 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Aktivitätstyp Datum Favorit \\\n",
|
||
"0 Laufen 2025-09-21 09:53:57 False \n",
|
||
"1 Laufen 2025-09-20 15:18:50 False \n",
|
||
"2 Virtuelles Radfahren 2025-09-19 12:31:00 False \n",
|
||
"3 Laufen 2025-09-15 11:16:13 False \n",
|
||
"4 Laufen 2025-09-13 10:51:36 False \n",
|
||
"\n",
|
||
" Titel Distanz Kalorien \\\n",
|
||
"0 Berlin - BMW Berlin Marathon (42.195 km) 42.65 2,817 \n",
|
||
"1 Berlin Laufen 3.72 246 \n",
|
||
"2 Zwift - Renewal on Bridges and Boardwalks in M... 12.81 210 \n",
|
||
"3 Rüthi Laufen 5.60 350 \n",
|
||
"4 Rüthi Laufen 9.01 549 \n",
|
||
"\n",
|
||
" Zeit Ø Herzfrequenz Maximale Herzfrequenz Aerober TE ... \\\n",
|
||
"0 04:35:15 148 165 5.0 ... \n",
|
||
"1 00:20:13 144 161 2.8 ... \n",
|
||
"2 00:30:18 116 134 1.5 ... \n",
|
||
"3 00:30:39 133 151 2.8 ... \n",
|
||
"4 00:44:56 144 171 3.5 ... \n",
|
||
"\n",
|
||
" Maximale Atemfrequenz Stressänderung Stress: Start Stress: Ende Ø Stress \\\n",
|
||
"0 41 -- -- -- -- \n",
|
||
"1 38 -- -- -- -- \n",
|
||
"2 -- -- -- -- -- \n",
|
||
"3 39 -- -- -- -- \n",
|
||
"4 41 -- -- -- -- \n",
|
||
"\n",
|
||
" Maximaler Stress Zeit in Bewegung Verstrichene Zeit Minimale Höhe \\\n",
|
||
"0 -- 04:34:55 04:35:15 33 \n",
|
||
"1 -- 00:20:12 00:20:13 40 \n",
|
||
"2 -- 00:30:17 00:30:17 4 \n",
|
||
"3 -- 00:30:37 00:30:39 421 \n",
|
||
"4 -- 00:44:45 00:44:56 421 \n",
|
||
"\n",
|
||
" Maximale Höhe \n",
|
||
"0 56 \n",
|
||
"1 47 \n",
|
||
"2 59 \n",
|
||
"3 437 \n",
|
||
"4 435 \n",
|
||
"\n",
|
||
"[5 rows x 52 columns]"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_activities.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "119e7f65",
|
||
"metadata": {},
|
||
"source": [
|
||
"Um die Frage \"Wie sich der Sport auf die Ruheherzfrequenz (RHF) auswirkt\" zu beantworten, erstellen wir einen neuen, kombinierten Datensatz.\n",
|
||
"\n",
|
||
"Dafür nutzen wir:\n",
|
||
"\n",
|
||
"Aus sleep.csv: Das Date (Datum) und die RHR (Ruheherzfrequenz). Dies ist der Wert, den wir messen möchten.\n",
|
||
"\n",
|
||
"Aus all-activities.csv: Den Activity_Timestamp (Zeitstempel der Aktivität) und die Calorie (Kalorienverbrauch).\n",
|
||
"\n",
|
||
"Zuerst summieren wir die Calorie-Werte pro Datum in der all-activities.csv, um die tägliche Gesamtaktivität zu erhalten. Anschließend verbinden wir diese täglichen Aktivitätsdaten mit den RHF-Werten aus der sleep.csv über das gemeinsame Datum."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "411dec6a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Ruheherzfrequenz</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-30</td>\n",
|
||
" <td>44</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-29</td>\n",
|
||
" <td>46</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-28</td>\n",
|
||
" <td>47</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-27</td>\n",
|
||
" <td>41</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-26</td>\n",
|
||
" <td>42</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Ruheherzfrequenz\n",
|
||
"0 2025-09-30 44\n",
|
||
"1 2025-09-29 46\n",
|
||
"2 2025-09-28 47\n",
|
||
"3 2025-09-27 41\n",
|
||
"4 2025-09-26 42"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Pull the Datum and Ruheherzfrequenz from sleep.csv\n",
|
||
"df_sleep_filtered = df_sleep[['Datum', 'Ruheherzfrequenz']]\n",
|
||
"df_sleep_filtered.head()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "8876cc1f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-21 09:53:57</td>\n",
|
||
" <td>2,817</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-20 15:18:50</td>\n",
|
||
" <td>246</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-19 12:31:00</td>\n",
|
||
" <td>210</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-15 11:16:13</td>\n",
|
||
" <td>350</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-13 10:51:36</td>\n",
|
||
" <td>549</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Kalorien\n",
|
||
"0 2025-09-21 09:53:57 2,817\n",
|
||
"1 2025-09-20 15:18:50 246\n",
|
||
"2 2025-09-19 12:31:00 210\n",
|
||
"3 2025-09-15 11:16:13 350\n",
|
||
"4 2025-09-13 10:51:36 549"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Pull the Datum and Kalorien from all-activities.csv\n",
|
||
"df_activities_filtered = df_activities[['Datum', 'Kalorien']]\n",
|
||
"df_activities_filtered.head()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"id": "54d3116d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2850544847.py:2: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum']).dt.date\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-21</td>\n",
|
||
" <td>2,817</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-20</td>\n",
|
||
" <td>246</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-19</td>\n",
|
||
" <td>210</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-15</td>\n",
|
||
" <td>350</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-13</td>\n",
|
||
" <td>549</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Kalorien\n",
|
||
"0 2025-09-21 2,817\n",
|
||
"1 2025-09-20 246\n",
|
||
"2 2025-09-19 210\n",
|
||
"3 2025-09-15 350\n",
|
||
"4 2025-09-13 549"
|
||
]
|
||
},
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# for the Datum column, get rid of the time part and keep only the date part\n",
|
||
"df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum']).dt.date\n",
|
||
"df_activities_filtered.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "f26dc1cf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-21</td>\n",
|
||
" <td>2817</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-20</td>\n",
|
||
" <td>246</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-19</td>\n",
|
||
" <td>210</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-15</td>\n",
|
||
" <td>350</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-13</td>\n",
|
||
" <td>549</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Kalorien\n",
|
||
"0 2025-09-21 2817\n",
|
||
"1 2025-09-20 246\n",
|
||
"2 2025-09-19 210\n",
|
||
"3 2025-09-15 350\n",
|
||
"4 2025-09-13 549"
|
||
]
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_activities_filtered.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "2caa80da",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Datum object\n",
|
||
"Kalorien object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check the data types\n",
|
||
"df_activities_filtered.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"id": "503b1ae9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:2: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'])\n",
|
||
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:4: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].str.replace(',', '')\n",
|
||
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:6: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].replace('--', '0')\n",
|
||
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2726110581.py:8: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].astype(int)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Datum datetime64[ns]\n",
|
||
"Kalorien int64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# change the data type of Datum to datetime\n",
|
||
"df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'])\n",
|
||
"#get rid of commas in Kalorien column\n",
|
||
"df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].str.replace(',', '')\n",
|
||
"# replace all \"--\" values with 0\n",
|
||
"df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].replace('--', '0')\n",
|
||
"# change the data type of Kalorien to integer\n",
|
||
"df_activities_filtered['Kalorien'] = df_activities_filtered['Kalorien'].astype(int)\n",
|
||
"df_activities_filtered.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "3ca1d34a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#write this cleaned data to a new csv file\n",
|
||
"df_activities_filtered.to_csv('cleaned_activities.csv', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "8e6a8924",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#write the cleaned sleep data to new csv files\n",
|
||
"df_sleep_filtered.to_csv('cleaned_sleep.csv', index=False)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "c19c4423",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"False"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check if each date is unique in df_activities_filtered\n",
|
||
"df_activities_filtered['Datum'].is_unique"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"id": "8c8dfaa6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"C:\\Users\\hizlanarif\\AppData\\Local\\Temp\\ipykernel_3384\\2358408937.py:2: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'], format='%d.%m.%Y').dt.date\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2025-09-21</td>\n",
|
||
" <td>2817</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2025-09-20</td>\n",
|
||
" <td>246</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2025-09-19</td>\n",
|
||
" <td>210</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2025-09-15</td>\n",
|
||
" <td>350</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2025-09-13</td>\n",
|
||
" <td>549</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Kalorien\n",
|
||
"0 2025-09-21 2817\n",
|
||
"1 2025-09-20 246\n",
|
||
"2 2025-09-19 210\n",
|
||
"3 2025-09-15 350\n",
|
||
"4 2025-09-13 549"
|
||
]
|
||
},
|
||
"execution_count": 51,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# change the Datum column to day month year format\n",
|
||
"df_activities_filtered['Datum'] = pd.to_datetime(df_activities_filtered['Datum'], format='%d.%m.%Y').dt.date\n",
|
||
"df_activities_filtered.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "0d0dd445",
|
||
"metadata": {},
|
||
"source": [
|
||
"We see that each value is not unique in Datum which suggests that for some days there are more than one entry."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "31a6f98d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# sum the Kalorien values per Date in all-activities.csv to get daily total activity\n",
|
||
"df_activities_daily = df_activities_filtered.groupby('Datum').sum().reset_index()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "864c302b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Datum</th>\n",
|
||
" <th>Kalorien</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2021-07-07</td>\n",
|
||
" <td>432</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2021-07-08</td>\n",
|
||
" <td>544</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2021-07-12</td>\n",
|
||
" <td>441</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2021-07-13</td>\n",
|
||
" <td>384</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2021-08-20</td>\n",
|
||
" <td>891</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Datum Kalorien\n",
|
||
"0 2021-07-07 432\n",
|
||
"1 2021-07-08 544\n",
|
||
"2 2021-07-12 441\n",
|
||
"3 2021-07-13 384\n",
|
||
"4 2021-08-20 891"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_activities_daily.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "b4c67f8e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"True"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#check if the dates are unique now\n",
|
||
"df_activities_daily['Datum'].is_unique"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3965ac4a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Sum the Calorie values per Date in all-activities.csv to get daily total activity\n",
|
||
"df_activities_daily = df_activities_filtered.resample('D', on='Datum').sum().reset_index()\n",
|
||
"# Merge the daily activity data with the RHR data from sleep.csv on the Date\n",
|
||
"df_combined = pd.merge(df_activities_daily, df_sleep[['Datum', 'Ruheherzfrequenz']], left_on='Datum', right_on='Datum', how='inner')\n",
|
||
"df_combined.head()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.13.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|