Fix titanic data download function
parent
2f777d7f3f
commit
517a2f18be
|
@ -2528,22 +2528,21 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from pathlib import Path\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
|
"import tarfile\n",
|
||||||
"import urllib.request\n",
|
"import urllib.request\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def load_titanic_data():\n",
|
"def load_titanic_data():\n",
|
||||||
" titanic_path = Path() / \"datasets\" / \"titanic\"\n",
|
" tarball_path = Path(\"datasets/titanic.tgz\")\n",
|
||||||
" titanic_path.mkdir(parents=True, exist_ok=True)\n",
|
" if not tarball_path.is_file():\n",
|
||||||
" filenames = (\"train.csv\", \"test.csv\")\n",
|
" Path(\"datasets\").mkdir(parents=True, exist_ok=True)\n",
|
||||||
" for filename in filenames:\n",
|
" url = \"https://github.com/ageron/data/raw/main/titanic.tgz\"\n",
|
||||||
" filepath = titanic_path / filename\n",
|
" urllib.request.urlretrieve(url, tarball_path)\n",
|
||||||
" if filepath.is_file():\n",
|
" with tarfile.open(tarball_path) as titanic_tarball:\n",
|
||||||
" continue\n",
|
" titanic_tarball.extractall(path=\"datasets\")\n",
|
||||||
" data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
" return [pd.read_csv(Path(\"datasets/titanic\") / filename)\n",
|
||||||
" url = data_root + \"titanic/\" + filename\n",
|
" for filename in (\"train.csv\", \"test.csv\")]"
|
||||||
" print(\"Downloading\", filename)\n",
|
|
||||||
" urllib.request.urlretrieve(url, filepath)\n",
|
|
||||||
" return [pd.read_csv(titanic_path / filename) for filename in filenames]"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue