Tarballs like housing.tgz include the parent directory

main
Aurélien Geron 2022-02-21 09:51:32 +13:00
parent 027d7368c0
commit 2f777d7f3f
1 changed files with 9 additions and 12 deletions

View File

@ -109,22 +109,19 @@
"outputs": [],
"source": [
"from pathlib import Path\n",
"import pandas as pd\n",
"import tarfile\n",
"import urllib.request\n",
"\n",
"import pandas as pd\n",
"\n",
"def load_housing_data():\n",
" housing_path = Path() / \"datasets\" / \"housing\"\n",
" if not (housing_path / \"housing.csv\").is_file():\n",
" housing_path.mkdir(parents=True, exist_ok=True)\n",
" data_root = \"https://github.com/ageron/data/raw/main/\"\n",
" url = data_root + \"housing/housing.tgz\"\n",
" tgz_path = housing_path / \"housing.tgz\"\n",
" urllib.request.urlretrieve(url, tgz_path)\n",
" with tarfile.open(tgz_path) as housing_tgz:\n",
" housing_tgz.extractall(path=housing_path)\n",
" return pd.read_csv(housing_path / \"housing.csv\")\n",
" tarball_path = Path(\"datasets/housing.tgz\")\n",
" if not tarball_path.is_file():\n",
" Path(\"datasets\").mkdir(parents=True, exist_ok=True)\n",
" url = \"https://github.com/ageron/data/raw/main/housing.tgz\"\n",
" urllib.request.urlretrieve(url, tarball_path)\n",
" with tarfile.open(tarball_path) as housing_tarball:\n",
" housing_tarball.extractall(path=\"datasets\")\n",
" return pd.read_csv(Path(\"datasets/housing/housing.csv\"))\n",
"\n",
"housing = load_housing_data()"
]