Have lifesat_full.csv and lifesat.csv

main
Aurélien Geron 2021-10-19 11:55:02 +13:00
parent 0b8a519395
commit 84f173b600
1 changed files with 4 additions and 3 deletions

View File

@ -118,7 +118,7 @@
"import pandas as pd\n", "import pandas as pd\n",
"from sklearn.linear_model import LinearRegression\n", "from sklearn.linear_model import LinearRegression\n",
"\n", "\n",
"# Load the data\n", "# Load and prepare the data\n",
"lifesat = pd.read_csv(Path() / \"datasets\" / \"lifesat\" / \"lifesat.csv\")\n", "lifesat = pd.read_csv(Path() / \"datasets\" / \"lifesat\" / \"lifesat.csv\")\n",
"X = lifesat[[\"GDP per capita (USD)\"]].values\n", "X = lifesat[[\"GDP per capita (USD)\"]].values\n",
"y = lifesat[[\"Life satisfaction\"]].values\n", "y = lifesat[[\"Life satisfaction\"]].values\n",
@ -305,7 +305,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"full_country_stats = prepare_country_stats(oecd_bli, gdp_per_capita)\n", "full_country_stats = prepare_country_stats(oecd_bli, gdp_per_capita)\n",
"full_country_stats.to_csv(datapath / \"lifesat.csv\")" "full_country_stats.to_csv(datapath / \"lifesat_full.csv\")"
] ]
}, },
{ {
@ -326,6 +326,7 @@
"max_gdp = 62_500\n", "max_gdp = 62_500\n",
"country_stats = full_country_stats[(full_country_stats[gdppc] >= min_gdp) &\n", "country_stats = full_country_stats[(full_country_stats[gdppc] >= min_gdp) &\n",
" (full_country_stats[gdppc] <= max_gdp)]\n", " (full_country_stats[gdppc] <= max_gdp)]\n",
"country_stats.to_csv(datapath / \"lifesat.csv\")\n",
"country_stats.head()" "country_stats.head()"
] ]
}, },
@ -373,7 +374,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"highlighted_countries = country_stats.loc[list(position_text.keys())]\n", "highlighted_countries = country_stats.loc[list(position_text.keys())]\n",
"highlighted_countries[[\"Life satisfaction\"]].sort_values(by=\"Life satisfaction\")" "highlighted_countries[[gdppc, \"Life satisfaction\"]].sort_values(by=gdppc)"
] ]
}, },
{ {