Fix titanic data download function

main
Aurélien Geron 2022-02-21 10:20:48 +13:00
parent 2f777d7f3f
commit 517a2f18be
1 changed files with 11 additions and 12 deletions

View File

@ -2528,22 +2528,21 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from pathlib import Path\n",
"import pandas as pd\n", "import pandas as pd\n",
"import tarfile\n",
"import urllib.request\n", "import urllib.request\n",
"\n", "\n",
"def load_titanic_data():\n", "def load_titanic_data():\n",
" titanic_path = Path() / \"datasets\" / \"titanic\"\n", " tarball_path = Path(\"datasets/titanic.tgz\")\n",
" titanic_path.mkdir(parents=True, exist_ok=True)\n", " if not tarball_path.is_file():\n",
" filenames = (\"train.csv\", \"test.csv\")\n", " Path(\"datasets\").mkdir(parents=True, exist_ok=True)\n",
" for filename in filenames:\n", " url = \"https://github.com/ageron/data/raw/main/titanic.tgz\"\n",
" filepath = titanic_path / filename\n", " urllib.request.urlretrieve(url, tarball_path)\n",
" if filepath.is_file():\n", " with tarfile.open(tarball_path) as titanic_tarball:\n",
" continue\n", " titanic_tarball.extractall(path=\"datasets\")\n",
" data_root = \"https://github.com/ageron/data/raw/main/\"\n", " return [pd.read_csv(Path(\"datasets/titanic\") / filename)\n",
" url = data_root + \"titanic/\" + filename\n", " for filename in (\"train.csv\", \"test.csv\")]"
" print(\"Downloading\", filename)\n",
" urllib.request.urlretrieve(url, filepath)\n",
" return [pd.read_csv(titanic_path / filename) for filename in filenames]"
] ]
}, },
{ {