diff --git a/01_the_machine_learning_landscape.ipynb b/01_the_machine_learning_landscape.ipynb index 71e6f3b..68a6720 100644 --- a/01_the_machine_learning_landscape.ipynb +++ b/01_the_machine_learning_landscape.ipynb @@ -118,7 +118,7 @@ "import pandas as pd\n", "from sklearn.linear_model import LinearRegression\n", "\n", - "# Load the data\n", + "# Load and prepare the data\n", "lifesat = pd.read_csv(Path() / \"datasets\" / \"lifesat\" / \"lifesat.csv\")\n", "X = lifesat[[\"GDP per capita (USD)\"]].values\n", "y = lifesat[[\"Life satisfaction\"]].values\n", @@ -305,7 +305,7 @@ "outputs": [], "source": [ "full_country_stats = prepare_country_stats(oecd_bli, gdp_per_capita)\n", - "full_country_stats.to_csv(datapath / \"lifesat.csv\")" + "full_country_stats.to_csv(datapath / \"lifesat_full.csv\")" ] }, { @@ -326,6 +326,7 @@ "max_gdp = 62_500\n", "country_stats = full_country_stats[(full_country_stats[gdppc] >= min_gdp) &\n", " (full_country_stats[gdppc] <= max_gdp)]\n", + "country_stats.to_csv(datapath / \"lifesat.csv\")\n", "country_stats.head()" ] }, @@ -373,7 +374,7 @@ "outputs": [], "source": [ "highlighted_countries = country_stats.loc[list(position_text.keys())]\n", - "highlighted_countries[[\"Life satisfaction\"]].sort_values(by=\"Life satisfaction\")" + "highlighted_countries[[gdppc, \"Life satisfaction\"]].sort_values(by=gdppc)" ] }, {