From 2f777d7f3f9adf025389d7fc808d5babc7d8e431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Mon, 21 Feb 2022 09:51:32 +1300 Subject: [PATCH] Tarballs like housing.tgz include the parent directory --- 02_end_to_end_machine_learning_project.ipynb | 21 +++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index f8fe208..167ab7c 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -109,22 +109,19 @@ "outputs": [], "source": [ "from pathlib import Path\n", + "import pandas as pd\n", "import tarfile\n", "import urllib.request\n", "\n", - "import pandas as pd\n", - "\n", "def load_housing_data():\n", - " housing_path = Path() / \"datasets\" / \"housing\"\n", - " if not (housing_path / \"housing.csv\").is_file():\n", - " housing_path.mkdir(parents=True, exist_ok=True)\n", - " data_root = \"https://github.com/ageron/data/raw/main/\"\n", - " url = data_root + \"housing/housing.tgz\"\n", - " tgz_path = housing_path / \"housing.tgz\"\n", - " urllib.request.urlretrieve(url, tgz_path)\n", - " with tarfile.open(tgz_path) as housing_tgz:\n", - " housing_tgz.extractall(path=housing_path)\n", - " return pd.read_csv(housing_path / \"housing.csv\")\n", + " tarball_path = Path(\"datasets/housing.tgz\")\n", + " if not tarball_path.is_file():\n", + " Path(\"datasets\").mkdir(parents=True, exist_ok=True)\n", + " url = \"https://github.com/ageron/data/raw/main/housing.tgz\"\n", + " urllib.request.urlretrieve(url, tarball_path)\n", + " with tarfile.open(tarball_path) as housing_tarball:\n", + " housing_tarball.extractall(path=\"datasets\")\n", + " return pd.read_csv(Path(\"datasets/housing/housing.csv\"))\n", "\n", "housing = load_housing_data()" ]