Tarballs like housing.tgz include the parent directory

main
Aurélien Geron 2022-02-21 10:23:31 +13:00
parent 517a2f18be
commit 0ce6548ddc
1 changed files with 10 additions and 13 deletions

View File

@ -1072,23 +1072,20 @@
"source": [
"# extra code at least not in this chapter, it's presented in chapter 2\n",
"\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"import tarfile\n",
"import urllib.request\n",
"\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def load_housing_data():\n",
" housing_path = Path() / \"datasets\" / \"housing\"\n",
" if not (housing_path / \"housing.csv\").is_file():\n",
" housing_path.mkdir(parents=True, exist_ok=True)\n",
" root = \"https://github.com/ageron/data/raw/main/\"\n",
" url = root + \"housing/housing.tgz\"\n",
" tgz_path = housing_path / \"housing.tgz\"\n",
" urllib.request.urlretrieve(url, tgz_path)\n",
" with tarfile.open(tgz_path) as housing_tgz:\n",
" housing_tgz.extractall(path=housing_path)\n",
" return pd.read_csv(housing_path / \"housing.csv\")\n",
" tarball_path = Path(\"datasets/housing.tgz\")\n",
" if not tarball_path.is_file():\n",
" Path(\"datasets\").mkdir(parents=True, exist_ok=True)\n",
" url = \"https://github.com/ageron/data/raw/main/housing.tgz\"\n",
" urllib.request.urlretrieve(url, tarball_path)\n",
" with tarfile.open(tarball_path) as housing_tarball:\n",
" housing_tarball.extractall(path=\"datasets\")\n",
" return pd.read_csv(Path(\"datasets/housing/housing.csv\"))\n",
"\n",
"housing = load_housing_data()\n",
"\n",