From 0ce6548ddca2583ac367b67b1b460fb31b7f590c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Mon, 21 Feb 2022 10:23:31 +1300 Subject: [PATCH] Tarballs like housing.tgz include the parent directory --- 07_ensemble_learning_and_random_forests.ipynb | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index d7a0683..70bc221 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -1072,23 +1072,20 @@ "source": [ "# extra code – at least not in this chapter, it's presented in chapter 2\n", "\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", "import tarfile\n", "import urllib.request\n", "\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "\n", "def load_housing_data():\n", - " housing_path = Path() / \"datasets\" / \"housing\"\n", - " if not (housing_path / \"housing.csv\").is_file():\n", - " housing_path.mkdir(parents=True, exist_ok=True)\n", - " root = \"https://github.com/ageron/data/raw/main/\"\n", - " url = root + \"housing/housing.tgz\"\n", - " tgz_path = housing_path / \"housing.tgz\"\n", - " urllib.request.urlretrieve(url, tgz_path)\n", - " with tarfile.open(tgz_path) as housing_tgz:\n", - " housing_tgz.extractall(path=housing_path)\n", - " return pd.read_csv(housing_path / \"housing.csv\")\n", + " tarball_path = Path(\"datasets/housing.tgz\")\n", + " if not tarball_path.is_file():\n", + " Path(\"datasets\").mkdir(parents=True, exist_ok=True)\n", + " url = \"https://github.com/ageron/data/raw/main/housing.tgz\"\n", + " urllib.request.urlretrieve(url, tarball_path)\n", + " with tarfile.open(tarball_path) as housing_tarball:\n", + " housing_tarball.extractall(path=\"datasets\")\n", + " return pd.read_csv(Path(\"datasets/housing/housing.csv\"))\n", "\n", "housing = load_housing_data()\n", "\n",