430 lines
13 KiB
Plaintext
430 lines
13 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c2188cd7",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Read data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5b2b0060",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import seaborn as sns\n",
|
|
"import matplotlib.pyplot as plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "52f55dde",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"activity_cols = {\n",
|
|
" \"Datum\": \"date\",\n",
|
|
" \"Ø Pace\": \"activity_pace_average\",\n",
|
|
" \"Kalorien\": \"activity_calories\",\n",
|
|
" \"Zeit\": \"activity_duration\",\n",
|
|
" \"Ø Herzfrequenz\": \"activity_heart_rate_average\",\n",
|
|
" \"Ø Atemfrequenz\": \"activity_breathing_rate_average\",\n",
|
|
"}\n",
|
|
"df_activities = pd.read_csv(\n",
|
|
" \"data/raw/all_activities.csv\",\n",
|
|
" usecols=list(activity_cols.keys())\n",
|
|
")\n",
|
|
"df_activities.rename(columns=activity_cols, inplace=True)\n",
|
|
"\n",
|
|
"sleep_cols = {\n",
|
|
" \"Sleep Score 7 Tage\": \"date\",\n",
|
|
" \"Score\": \"sleep_score\",\n",
|
|
" \"Dauer\": \"sleep_duration\",\n",
|
|
" \"Schlafenszeit\": \"sleep_bedtime\",\n",
|
|
" \"Ruheherzfrequenz\": \"sleep_resting_heart_rate\",\n",
|
|
" \"HFV-Status\": \"sleep_hrv_status\",\n",
|
|
" \"Atmung\": \"sleep_breathing_rate\",\n",
|
|
" \"Schlafbedürfnis\": \"sleep_duration_needed\",\n",
|
|
"}\n",
|
|
"df_sleep = pd.read_csv(\n",
|
|
" \"data/raw/sleep.csv\",\n",
|
|
" usecols=list(sleep_cols.keys())\n",
|
|
")\n",
|
|
"df_sleep.rename(columns=sleep_cols, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "91f04fe7",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Clean data/time features"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "97252627",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Activities"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "826e5af0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def pace_to_seconds(pace_str):\n",
|
|
" try:\n",
|
|
" if isinstance(pace_str, str) and \":\" in pace_str:\n",
|
|
" parts = pace_str.split(\":\")\n",
|
|
" if len(parts) == 2:\n",
|
|
" minutes, seconds = int(parts[0]), int(parts[1])\n",
|
|
" return minutes * 60 + seconds\n",
|
|
" elif len(parts) == 3: # If format is HH:MM:SS\n",
|
|
" hours, minutes, seconds = int(\n",
|
|
" parts[0]), int(parts[1]), int(parts[2])\n",
|
|
" return hours * 3600 + minutes * 60 + seconds\n",
|
|
" return float(pace_str)\n",
|
|
" except Exception:\n",
|
|
" return 0\n",
|
|
"\n",
|
|
"\n",
|
|
"df_activities[\"date\"] = pd.to_datetime(df_activities[\"date\"], errors=\"coerce\")\n",
|
|
"df_activities[\"activity_starting_time\"] = df_activities[\"date\"] - df_activities[\"date\"].dt.normalize()\n",
|
|
"df_activities[\"date\"] = df_activities[\"date\"].dt.normalize()\n",
|
|
"\n",
|
|
"df_activities[\"activity_pace_average\"] = df_activities[\"activity_pace_average\"].apply(pace_to_seconds)\n",
|
|
"df_activities[\"activity_duration\"] = pd.to_timedelta(df_activities[\"activity_duration\"], errors=\"coerce\")\n",
|
|
"df_activities[\"activity_duration_seconds\"] = df_activities[\"activity_duration\"].dt.total_seconds().fillna(0).astype(int)\n",
|
|
"\n",
|
|
"df_activities[\"activity_ending_time\"] = df_activities[\"activity_starting_time\"] + df_activities[\"activity_duration\"]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2cbef16d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"numeric_columns = [\n",
|
|
" \"activity_calories\",\n",
|
|
" \"activity_heart_rate_average\",\n",
|
|
" \"activity_pace_average\",\n",
|
|
" \"activity_breathing_rate_average\",\n",
|
|
"]\n",
|
|
"\n",
|
|
"for col in numeric_columns:\n",
|
|
" if df_activities[col].dtype == \"object\":\n",
|
|
" df_activities[col] = pd.to_numeric(df_activities[col].str.replace(',', '').replace('--', '0'), errors='coerce')\n",
|
|
" else:\n",
|
|
" df_activities[col] = pd.to_numeric(df_activities[col], errors='coerce')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "708414b6",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Sleep"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d3ec20f3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_sleep[\"date\"] = pd.to_datetime(df_sleep[\"date\"], errors=\"coerce\")\n",
|
|
"\n",
|
|
"df_sleep = df_sleep[df_sleep[\"sleep_bedtime\"] != \"--\"]\n",
|
|
"\n",
|
|
"df_sleep[\"sleep_bedtime\"] = pd.to_timedelta(\n",
|
|
" pd.to_datetime(df_sleep[\"sleep_bedtime\"].astype(str), format=\"%I:%M %p\").dt.hour * 3600 +\n",
|
|
" pd.to_datetime(df_sleep[\"sleep_bedtime\"].astype(str), format=\"%I:%M %p\").dt.minute * 60,\n",
|
|
" unit=\"s\"\n",
|
|
")\n",
|
|
"\n",
|
|
"# Handle AM times by adding 24 hours to times before noon\n",
|
|
"mask = df_sleep[\"sleep_bedtime\"].dt.components['hours'] < 12\n",
|
|
"df_sleep.loc[mask, \"sleep_bedtime\"] = df_sleep.loc[mask, \"sleep_bedtime\"] + pd.Timedelta(days=1)\n",
|
|
"\n",
|
|
"df_sleep[\"sleep_duration\"] = pd.to_timedelta(df_sleep[\"sleep_duration\"], errors=\"coerce\")\n",
|
|
"df_sleep[\"sleep_duration_seconds\"] = (df_sleep[\"sleep_duration\"]).dt.total_seconds()\n",
|
|
"df_sleep[\"sleep_duration_needed\"] = pd.to_timedelta(df_sleep[\"sleep_duration_needed\"], errors=\"coerce\")\n",
|
|
"df_sleep[\"sleep_duration_needed_seconds\"] = (df_sleep[\"sleep_duration_needed\"]).dt.total_seconds() \n",
|
|
"\n",
|
|
"df_sleep[\"sleep_duration_needed_delta\"] = df_sleep[\"sleep_duration\"] - df_sleep[\"sleep_duration_needed\"]\n",
|
|
"df_sleep[\"sleep_duration_needed_delta_seconds\"] = (df_sleep[\"sleep_duration_needed_delta\"]).dt.total_seconds()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "845cc713",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"numeric_columns = [\n",
|
|
" \"sleep_score\",\n",
|
|
" \"sleep_resting_heart_rate\",\n",
|
|
" \"sleep_hrv_status\",\n",
|
|
" \"sleep_breathing_rate\",\n",
|
|
"]\n",
|
|
"\n",
|
|
"for col in numeric_columns:\n",
|
|
" if df_sleep[col].dtype == \"object\":\n",
|
|
" df_sleep[col] = pd.to_numeric(df_sleep[col].str.replace(',', '').replace('--', '0'), errors='coerce')\n",
|
|
" else:\n",
|
|
" df_sleep[col] = pd.to_numeric(df_sleep[col], errors='coerce')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "fabceba5",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Combined"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "05da5fe7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_latest_activity = df_activities.sort_values('activity_ending_time').groupby('date', as_index=False).last()\n",
|
|
"\n",
|
|
"df_combined = pd.merge(df_latest_activity, df_sleep, on='date', how='right')\n",
|
|
"\n",
|
|
"if \"activity_ending_time\" in df_combined.columns and \"sleep_bedtime\" in df_combined.columns:\n",
|
|
" df_combined[\"bedtime_activity_ending_delta\"] = df_combined[\"sleep_bedtime\"] - df_combined[\"activity_ending_time\"]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "427af06c",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Save cleaned data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6a8888ce",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_combined.to_csv(\"data/cleaned/combined_activities_sleep.csv\", index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "730ab7c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Data overview"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "47ffa998",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_combined.head(30)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ce818c76",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_combined.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4c848335",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Visualizations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e228914c",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Corelation Matrix sleep after all activities"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "41080d47",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"filtered_all_activities = df_combined[(df_combined[\"activity_calories\"].notna())]\n",
|
|
"correlation_matrix = df_combined.corr(numeric_only=True)\n",
|
|
"\n",
|
|
"plt.figure(figsize=(12, 10))\n",
|
|
"sns.heatmap(correlation_matrix, annot=True, cmap=\"coolwarm\", center=0)\n",
|
|
"plt.title(\"Korrelationsmatrix - Alle Aktivitären\")\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d5ba27c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Corelation Matrix sleep after activities < 4h before sleep"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cf54e6c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"df_combine_activities_4_hours_before_sleep = df_combined[(df_combined[\"activity_calories\"].notna()) & (\n",
|
|
" df_combined[\"bedtime_activity_ending_delta\"] < pd.Timedelta(hours=4))]\n",
|
|
"\n",
|
|
"correlation_matrix = df_combine_activities_4_hours_before_sleep.corr(\n",
|
|
" numeric_only=True)\n",
|
|
"\n",
|
|
"plt.figure(figsize=(12, 10))\n",
|
|
"sns.heatmap(correlation_matrix, annot=True, cmap=\"coolwarm\", center=0)\n",
|
|
"plt.title(\"Korrelationsmatrix- Trainings 4 Stunden vor dem Schlafen\")\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "634ee858",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Scatter plot"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e2246df2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"filtered_df_combined_4h_before_sleep = df_combine_activities_4_hours_before_sleep[\n",
|
|
" (df_combine_activities_4_hours_before_sleep[\"activity_calories\"].notna())\n",
|
|
"]\n",
|
|
"\n",
|
|
"blue_count = len(filtered_df_combined_4h_before_sleep)\n",
|
|
"\n",
|
|
"plt.figure(figsize=(8, 6))\n",
|
|
"plt.scatter(\n",
|
|
" filtered_df_combined_4h_before_sleep['activity_heart_rate_average'],\n",
|
|
" filtered_df_combined_4h_before_sleep['sleep_score'],\n",
|
|
" alpha=0.7,\n",
|
|
" color='blue',\n",
|
|
" label=f'Training <4h vor Schlaf (n={blue_count})'\n",
|
|
")\n",
|
|
"filtered_df_combined_more_than_4h_before_sleep = df_combined[\n",
|
|
" (df_combined[\"activity_calories\"].notna()) &\n",
|
|
" (df_combined[\"bedtime_activity_ending_delta\"] > pd.Timedelta(hours=4)) # &\n",
|
|
"]\n",
|
|
"\n",
|
|
"red_count = len(filtered_df_combined_more_than_4h_before_sleep)\n",
|
|
"plt.scatter(\n",
|
|
" filtered_df_combined_more_than_4h_before_sleep['activity_heart_rate_average'],\n",
|
|
" filtered_df_combined_more_than_4h_before_sleep['sleep_score'],\n",
|
|
" alpha=0.7,\n",
|
|
" color='red',\n",
|
|
" label=f'Training ≥4h vor Schlaf (n={red_count})'\n",
|
|
")\n",
|
|
"\n",
|
|
"plt.xlabel('Activity Heart Rate Average')\n",
|
|
"plt.ylabel('HRV Status That Night')\n",
|
|
"plt.ylim(60, 110)\n",
|
|
"plt.grid(True)\n",
|
|
"plt.legend(title='Gruppe', loc='best')\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"print(f'Number of blue points: {blue_count}')\n",
|
|
"print(f'Number of red points: {red_count}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "52f93b1e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"counts = {\n",
|
|
" \"Kein Training\": len(df_combined[(df_combined[\"activity_calories\"].isna())]),\n",
|
|
" \"Training weniger als 4h vor Schlaf\": len(df_combined[(df_combined[\"activity_calories\"].notna()) & (\n",
|
|
" df_combined[\"bedtime_activity_ending_delta\"] < pd.Timedelta(hours=4))]),\n",
|
|
" \"Training mehr als 4h vor Schlaf\": len(df_combined[(df_combined[\"activity_calories\"].notna()) & (\n",
|
|
" df_combined[\"bedtime_activity_ending_delta\"] >= pd.Timedelta(hours=4))]),\n",
|
|
"}\n",
|
|
"\n",
|
|
"labels = list(counts.keys())\n",
|
|
"sizes = list(counts.values())\n",
|
|
"\n",
|
|
"plt.figure(figsize=(6, 6))\n",
|
|
"plt.pie(\n",
|
|
" sizes,\n",
|
|
" labels=[f\"{lab} ({cnt})\" for lab, cnt in zip(labels, sizes)],\n",
|
|
" autopct=\"%1.1f%%\",\n",
|
|
" startangle=90,\n",
|
|
")\n",
|
|
"plt.title(\n",
|
|
" \"Verteilung: Kein Training / Training <4h vor Schlaf / Training ≥4h vor Schlaf\")\n",
|
|
"plt.axis(\"equal\")\n",
|
|
"plt.show()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "base",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|