Use read_csv() to download the data directly
parent
85171acd17
commit
ed704b8e34
|
@ -111,41 +111,6 @@
|
|||
"plt.rc('ytick', labelsize=10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Download `lifesat.csv` from github, unless it's already available locally:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Downloading lifesat.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
|
||||
"datapath.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||
"filename = \"lifesat.csv\"\n",
|
||||
"if not (datapath / filename).is_file():\n",
|
||||
" print(\"Downloading\", filename)\n",
|
||||
" url = data_root + \"lifesat/\" + filename\n",
|
||||
" urllib.request.urlretrieve(url, datapath / filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -155,7 +120,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -179,15 +144,14 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"\n",
|
||||
"# Load and prepare the data\n",
|
||||
"lifesat = pd.read_csv(Path() / \"datasets\" / \"lifesat\" / \"lifesat.csv\")\n",
|
||||
"# Download and prepare the data\n",
|
||||
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||
"lifesat = pd.read_csv(data_root + \"lifesat/lifesat.csv\")\n",
|
||||
"X = lifesat[[\"GDP per capita (USD)\"]].values\n",
|
||||
"y = lifesat[[\"Life satisfaction\"]].values\n",
|
||||
"\n",
|
||||
|
@ -232,7 +196,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -279,10 +243,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"# Where to save the figures\n",
|
||||
"IMAGES_PATH = Path() / \"images\" / \"fundamentals\"\n",
|
||||
"IMAGES_PATH.mkdir(parents=True, exist_ok=True)\n",
|
||||
|
@ -312,7 +278,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -325,6 +291,12 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
|
||||
"datapath.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||
"for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n",
|
||||
" if not (datapath / filename).is_file():\n",
|
||||
" print(\"Downloading\", filename)\n",
|
||||
|
@ -334,7 +306,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -351,7 +323,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -417,7 +389,7 @@
|
|||
"Algeria 10681.679297"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -444,7 +416,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -717,7 +689,7 @@
|
|||
"[5 rows x 24 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -738,7 +710,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -811,7 +783,7 @@
|
|||
"Chile 23324.524751 6.5"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -834,7 +806,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -907,7 +879,7 @@
|
|||
"Hungary 31007.768407 5.6"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -923,7 +895,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -933,7 +905,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -984,7 +956,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1069,7 +1041,7 @@
|
|||
"United States 60235.728492 6.9"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1081,7 +1053,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1126,7 +1098,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1152,7 +1124,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1188,7 +1160,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1197,7 +1169,7 @@
|
|||
"37655.1803457421"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1209,7 +1181,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1218,7 +1190,7 @@
|
|||
"6.301656332738056"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1230,7 +1202,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1271,7 +1243,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1368,7 +1340,7 @@
|
|||
"Luxembourg 110261.157353 6.9"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1381,7 +1353,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1400,7 +1372,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1448,7 +1420,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1491,7 +1463,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1505,7 +1477,7 @@
|
|||
"Name: Life satisfaction, dtype: float64"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1517,7 +1489,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1633,7 +1605,7 @@
|
|||
"Switzerland 68393.306004"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1645,7 +1617,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1727,9 +1699,9 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "homl3",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "homl3"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
@ -1741,7 +1713,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.12"
|
||||
"version": "3.9.10"
|
||||
},
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
|
|
Loading…
Reference in New Issue