Move datasets to project ageron/data to shrink this repo

main
Aurélien Geron 2022-02-19 21:36:43 +13:00
parent 8745a9c2ac
commit c9b977309a
6 changed files with 27 additions and 31 deletions

21
.gitignore vendored
View File

@ -4,19 +4,14 @@
*.old
*.pyc
.DS_Store
.ipynb_checkpoints
.ipynb_checkpoints/
.vscode/
checkpoint
logs/*
tf_logs/*
images/**/*.png
images/**/*.dot
/logs
/tf_logs
/images
my_*
person.proto
person.desc
person_pb2.py
datasets/flowers
datasets/spam
datasets/words
datasets/jsb_chorales
/person.proto
/person.desc
/person_pb2.py
/datasets

View File

@ -130,11 +130,11 @@
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
"datapath.mkdir(parents=True, exist_ok=True)\n",
"\n",
"root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
"filename = \"lifesat.csv\"\n",
"if not (datapath / filename).is_file():\n",
" print(\"Downloading\", filename)\n",
" url = root + \"datasets/lifesat/\" + filename\n",
" url = data_root + \"lifesat/\" + filename\n",
" urllib.request.urlretrieve(url, datapath / filename)"
]
},
@ -283,7 +283,7 @@
"for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n",
" if not (datapath / filename).is_file():\n",
" print(\"Downloading\", filename)\n",
" url = root + \"datasets/lifesat/\" + filename\n",
" url = data_root + \"lifesat/\" + filename\n",
" urllib.request.urlretrieve(url, datapath / filename)"
]
},

View File

@ -110,8 +110,8 @@
" housing_path = Path() / \"datasets\" / \"housing\"\n",
" if not (housing_path / \"housing.csv\").is_file():\n",
" housing_path.mkdir(parents=True, exist_ok=True)\n",
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
" url = root + \"datasets/housing/housing.tgz\"\n",
" data_root = \"https://github.com/ageron/data/raw/main/\"\n",
" url = data_root + \"housing/housing.tgz\"\n",
" tgz_path = housing_path / \"housing.tgz\"\n",
" urllib.request.urlretrieve(url, tgz_path)\n",
" with tarfile.open(tgz_path) as housing_tgz:\n",
@ -578,8 +578,8 @@
"# Download the California image\n",
"filename = \"california.png\"\n",
"if not (IMAGES_PATH / filename).is_file():\n",
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
" url = root + \"images/end_to_end_project/\" + filename\n",
" homl3_root = \"https://github.com/ageron/handson-ml3/raw/main/\"\n",
" url = homl3_root + \"images/end_to_end_project/\" + filename\n",
" print(\"Downloading\", filename)\n",
" urllib.request.urlretrieve(url, IMAGES_PATH / filename)\n",
"\n",

View File

@ -1635,8 +1635,8 @@
" filepath = titanic_path / filename\n",
" if filepath.is_file():\n",
" continue\n",
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
" url = root + \"/datasets/titanic/\" + filename\n",
" data_root = \"https://github.com/ageron/data/raw/main/\"\n",
" url = data_root + \"titanic/\" + filename\n",
" print(\"Downloading\", filename)\n",
" urllib.request.urlretrieve(url, filepath)\n",
" return [pd.read_csv(titanic_path / filename) for filename in filenames]"
@ -2123,9 +2123,9 @@
"import tarfile\n",
"\n",
"def fetch_spam_data():\n",
" root = \"http://spamassassin.apache.org/old/publiccorpus/\"\n",
" ham_url = root + \"20030228_easy_ham.tar.bz2\"\n",
" spam_url = root + \"20030228_spam.tar.bz2\"\n",
" spam_root = \"http://spamassassin.apache.org/old/publiccorpus/\"\n",
" ham_url = spam_root + \"20030228_easy_ham.tar.bz2\"\n",
" spam_url = spam_root + \"20030228_spam.tar.bz2\"\n",
"\n",
" spam_path = Path() / \"datasets\" / \"spam\"\n",
" spam_path.mkdir(parents=True, exist_ok=True)\n",

View File

@ -755,8 +755,8 @@
" housing_path = Path() / \"datasets\" / \"housing\"\n",
" if not (housing_path / \"housing.csv\").is_file():\n",
" housing_path.mkdir(parents=True, exist_ok=True)\n",
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
" url = root + \"datasets/housing/housing.tgz\"\n",
" root = \"https://github.com/ageron/data/raw/main/\"\n",
" url = root + \"housing/housing.tgz\"\n",
" tgz_path = housing_path / \"housing.tgz\"\n",
" urllib.request.urlretrieve(url, tgz_path)\n",
" with tarfile.open(tgz_path) as housing_tgz:\n",

View File

@ -895,7 +895,6 @@
"metadata": {},
"outputs": [],
"source": [
"import urllib.request\n",
"from sklearn.datasets import fetch_openml\n",
"\n",
"mnist = fetch_openml('mnist_784', as_frame=False)"
@ -1303,14 +1302,16 @@
"metadata": {},
"outputs": [],
"source": [
"# extra code\n",
"# extra code downloads the ladybug image\n",
"\n",
"root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
"import urllib.request\n",
"\n",
"homl3_root = \"https://github.com/ageron/handson-ml3/raw/main/\"\n",
"filename = \"ladybug.png\"\n",
"filepath = IMAGES_PATH / filename\n",
"if not filepath.is_file():\n",
" print(\"Downloading\", filename)\n",
" url = f\"{root}/images/unsupervised_learning/{filename}\"\n",
" url = f\"{homl3_root}/images/unsupervised_learning/{filename}\"\n",
" urllib.request.urlretrieve(url, filepath)"
]
},