Have lifesat_full.csv and lifesat.csv

2021-10-19 11:55:02 +13:00 · 2021-10-19 11:55:02 +13:00 · 84f173b600
parent 0b8a519395
commit 84f173b600
1 changed files with 4 additions and 3 deletions
--- a/01_the_machine_learning_landscape.ipynb
+++ b/01_the_machine_learning_landscape.ipynb
@ -118,7 +118,7 @@
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
-    "# Load the data\n",
+    "# Load and prepare the data\n",
    "lifesat = pd.read_csv(Path() / \"datasets\" / \"lifesat\" / \"lifesat.csv\")\n",
    "X = lifesat[[\"GDP per capita (USD)\"]].values\n",
    "y = lifesat[[\"Life satisfaction\"]].values\n",
@ -305,7 +305,7 @@
   "outputs": [],
   "source": [
    "full_country_stats = prepare_country_stats(oecd_bli, gdp_per_capita)\n",
-    "full_country_stats.to_csv(datapath / \"lifesat.csv\")"
+    "full_country_stats.to_csv(datapath / \"lifesat_full.csv\")"
   ]
  },
  {
@ -326,6 +326,7 @@
    "max_gdp = 62_500\n",
    "country_stats = full_country_stats[(full_country_stats[gdppc] >= min_gdp) &\n",
    "                                   (full_country_stats[gdppc] <= max_gdp)]\n",
+    "country_stats.to_csv(datapath / \"lifesat.csv\")\n",
    "country_stats.head()"
   ]
  },
@ -373,7 +374,7 @@
   "outputs": [],
   "source": [
    "highlighted_countries = country_stats.loc[list(position_text.keys())]\n",
-    "highlighted_countries[[\"Life satisfaction\"]].sort_values(by=\"Life satisfaction\")"
+    "highlighted_countries[[gdppc, \"Life satisfaction\"]].sort_values(by=gdppc)"
   ]
  },
  {