Use read_csv() to download the data directly

main
Aurélien Geron 2022-02-19 23:18:10 +13:00
parent 85171acd17
commit ed704b8e34
1 changed files with 49 additions and 77 deletions

View File

@ -111,41 +111,6 @@
"plt.rc('ytick', labelsize=10)" "plt.rc('ytick', labelsize=10)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Download `lifesat.csv` from github, unless it's already available locally:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading lifesat.csv\n"
]
}
],
"source": [
"from pathlib import Path\n",
"import urllib.request\n",
"\n",
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
"datapath.mkdir(parents=True, exist_ok=True)\n",
"\n",
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
"filename = \"lifesat.csv\"\n",
"if not (datapath / filename).is_file():\n",
" print(\"Downloading\", filename)\n",
" url = data_root + \"lifesat/\" + filename\n",
" urllib.request.urlretrieve(url, datapath / filename)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -155,7 +120,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -179,15 +144,14 @@
} }
], ],
"source": [ "source": [
"from pathlib import Path\n",
"\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"from sklearn.linear_model import LinearRegression\n", "from sklearn.linear_model import LinearRegression\n",
"\n", "\n",
"# Load and prepare the data\n", "# Download and prepare the data\n",
"lifesat = pd.read_csv(Path() / \"datasets\" / \"lifesat\" / \"lifesat.csv\")\n", "data_root = \"https://github.com/ageron/data/raw/main/\"\n",
"lifesat = pd.read_csv(data_root + \"lifesat/lifesat.csv\")\n",
"X = lifesat[[\"GDP per capita (USD)\"]].values\n", "X = lifesat[[\"GDP per capita (USD)\"]].values\n",
"y = lifesat[[\"Life satisfaction\"]].values\n", "y = lifesat[[\"Life satisfaction\"]].values\n",
"\n", "\n",
@ -232,7 +196,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -279,10 +243,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from pathlib import Path\n",
"\n",
"# Where to save the figures\n", "# Where to save the figures\n",
"IMAGES_PATH = Path() / \"images\" / \"fundamentals\"\n", "IMAGES_PATH = Path() / \"images\" / \"fundamentals\"\n",
"IMAGES_PATH.mkdir(parents=True, exist_ok=True)\n", "IMAGES_PATH.mkdir(parents=True, exist_ok=True)\n",
@ -312,7 +278,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -325,6 +291,12 @@
} }
], ],
"source": [ "source": [
"import urllib.request\n",
"\n",
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
"datapath.mkdir(parents=True, exist_ok=True)\n",
"\n",
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
"for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n", "for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n",
" if not (datapath / filename).is_file():\n", " if not (datapath / filename).is_file():\n",
" print(\"Downloading\", filename)\n", " print(\"Downloading\", filename)\n",
@ -334,7 +306,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -351,7 +323,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -417,7 +389,7 @@
"Algeria 10681.679297" "Algeria 10681.679297"
] ]
}, },
"execution_count": 11, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -444,7 +416,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -717,7 +689,7 @@
"[5 rows x 24 columns]" "[5 rows x 24 columns]"
] ]
}, },
"execution_count": 12, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -738,7 +710,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -811,7 +783,7 @@
"Chile 23324.524751 6.5" "Chile 23324.524751 6.5"
] ]
}, },
"execution_count": 13, "execution_count": 12,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -834,7 +806,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -907,7 +879,7 @@
"Hungary 31007.768407 5.6" "Hungary 31007.768407 5.6"
] ]
}, },
"execution_count": 14, "execution_count": 13,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -923,7 +895,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -933,7 +905,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -984,7 +956,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1069,7 +1041,7 @@
"United States 60235.728492 6.9" "United States 60235.728492 6.9"
] ]
}, },
"execution_count": 17, "execution_count": 16,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1081,7 +1053,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1126,7 +1098,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1152,7 +1124,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1188,7 +1160,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1197,7 +1169,7 @@
"37655.1803457421" "37655.1803457421"
] ]
}, },
"execution_count": 21, "execution_count": 20,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1209,7 +1181,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 21,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1218,7 +1190,7 @@
"6.301656332738056" "6.301656332738056"
] ]
}, },
"execution_count": 22, "execution_count": 21,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1230,7 +1202,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 22,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1271,7 +1243,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1368,7 +1340,7 @@
"Luxembourg 110261.157353 6.9" "Luxembourg 110261.157353 6.9"
] ]
}, },
"execution_count": 24, "execution_count": 23,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1381,7 +1353,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 24,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1400,7 +1372,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1448,7 +1420,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1491,7 +1463,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1505,7 +1477,7 @@
"Name: Life satisfaction, dtype: float64" "Name: Life satisfaction, dtype: float64"
] ]
}, },
"execution_count": 28, "execution_count": 27,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1517,7 +1489,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 28,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1633,7 +1605,7 @@
"Switzerland 68393.306004" "Switzerland 68393.306004"
] ]
}, },
"execution_count": 29, "execution_count": 28,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1645,7 +1617,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 29,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1727,9 +1699,9 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "homl3", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "homl3" "name": "python3"
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
@ -1741,7 +1713,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.12" "version": "3.9.10"
}, },
"metadata": { "metadata": {
"interpreter": { "interpreter": {