From c9b977309aeb24f96000fb1f4bc09417db448633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Sat, 19 Feb 2022 21:36:43 +1300 Subject: [PATCH] Move datasets to project ageron/data to shrink this repo --- .gitignore | 21 +++++++------------ 01_the_machine_learning_landscape.ipynb | 6 +++--- 02_end_to_end_machine_learning_project.ipynb | 8 +++---- 03_classification.ipynb | 10 ++++----- 07_ensemble_learning_and_random_forests.ipynb | 4 ++-- 09_unsupervised_learning.ipynb | 9 ++++---- 6 files changed, 27 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 30e00e8..d2e9d52 100644 --- a/.gitignore +++ b/.gitignore @@ -4,19 +4,14 @@ *.old *.pyc .DS_Store -.ipynb_checkpoints +.ipynb_checkpoints/ .vscode/ checkpoint -logs/* -tf_logs/* -images/**/*.png -images/**/*.dot +/logs +/tf_logs +/images my_* -person.proto -person.desc -person_pb2.py -datasets/flowers -datasets/spam -datasets/words -datasets/jsb_chorales - +/person.proto +/person.desc +/person_pb2.py +/datasets diff --git a/01_the_machine_learning_landscape.ipynb b/01_the_machine_learning_landscape.ipynb index 90d6586..c858cd4 100644 --- a/01_the_machine_learning_landscape.ipynb +++ b/01_the_machine_learning_landscape.ipynb @@ -130,11 +130,11 @@ "datapath = Path() / \"datasets\" / \"lifesat\"\n", "datapath.mkdir(parents=True, exist_ok=True)\n", "\n", - "root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n", + "data_root = \"https://github.com/ageron/data/raw/main/\"\n", "filename = \"lifesat.csv\"\n", "if not (datapath / filename).is_file():\n", " print(\"Downloading\", filename)\n", - " url = root + \"datasets/lifesat/\" + filename\n", + " url = data_root + \"lifesat/\" + filename\n", " urllib.request.urlretrieve(url, datapath / filename)" ] }, @@ -283,7 +283,7 @@ "for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n", " if not (datapath / filename).is_file():\n", " print(\"Downloading\", filename)\n", - " url = root + \"datasets/lifesat/\" + filename\n", + " url = data_root + \"lifesat/\" + filename\n", " urllib.request.urlretrieve(url, datapath / filename)" ] }, diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index 77c8f01..422fe31 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -110,8 +110,8 @@ " housing_path = Path() / \"datasets\" / \"housing\"\n", " if not (housing_path / \"housing.csv\").is_file():\n", " housing_path.mkdir(parents=True, exist_ok=True)\n", - " root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n", - " url = root + \"datasets/housing/housing.tgz\"\n", + " data_root = \"https://github.com/ageron/data/raw/main/\"\n", + " url = data_root + \"housing/housing.tgz\"\n", " tgz_path = housing_path / \"housing.tgz\"\n", " urllib.request.urlretrieve(url, tgz_path)\n", " with tarfile.open(tgz_path) as housing_tgz:\n", @@ -578,8 +578,8 @@ "# Download the California image\n", "filename = \"california.png\"\n", "if not (IMAGES_PATH / filename).is_file():\n", - " root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n", - " url = root + \"images/end_to_end_project/\" + filename\n", + " homl3_root = \"https://github.com/ageron/handson-ml3/raw/main/\"\n", + " url = homl3_root + \"images/end_to_end_project/\" + filename\n", " print(\"Downloading\", filename)\n", " urllib.request.urlretrieve(url, IMAGES_PATH / filename)\n", "\n", diff --git a/03_classification.ipynb b/03_classification.ipynb index 050d299..30c48e7 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -1635,8 +1635,8 @@ " filepath = titanic_path / filename\n", " if filepath.is_file():\n", " continue\n", - " root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n", - " url = root + \"/datasets/titanic/\" + filename\n", + " data_root = \"https://github.com/ageron/data/raw/main/\"\n", + " url = data_root + \"titanic/\" + filename\n", " print(\"Downloading\", filename)\n", " urllib.request.urlretrieve(url, filepath)\n", " return [pd.read_csv(titanic_path / filename) for filename in filenames]" @@ -2123,9 +2123,9 @@ "import tarfile\n", "\n", "def fetch_spam_data():\n", - " root = \"http://spamassassin.apache.org/old/publiccorpus/\"\n", - " ham_url = root + \"20030228_easy_ham.tar.bz2\"\n", - " spam_url = root + \"20030228_spam.tar.bz2\"\n", + " spam_root = \"http://spamassassin.apache.org/old/publiccorpus/\"\n", + " ham_url = spam_root + \"20030228_easy_ham.tar.bz2\"\n", + " spam_url = spam_root + \"20030228_spam.tar.bz2\"\n", "\n", " spam_path = Path() / \"datasets\" / \"spam\"\n", " spam_path.mkdir(parents=True, exist_ok=True)\n", diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index 931c23f..9854d5e 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -755,8 +755,8 @@ " housing_path = Path() / \"datasets\" / \"housing\"\n", " if not (housing_path / \"housing.csv\").is_file():\n", " housing_path.mkdir(parents=True, exist_ok=True)\n", - " root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n", - " url = root + \"datasets/housing/housing.tgz\"\n", + " root = \"https://github.com/ageron/data/raw/main/\"\n", + " url = root + \"housing/housing.tgz\"\n", " tgz_path = housing_path / \"housing.tgz\"\n", " urllib.request.urlretrieve(url, tgz_path)\n", " with tarfile.open(tgz_path) as housing_tgz:\n", diff --git a/09_unsupervised_learning.ipynb b/09_unsupervised_learning.ipynb index a5d8eae..78fff00 100644 --- a/09_unsupervised_learning.ipynb +++ b/09_unsupervised_learning.ipynb @@ -895,7 +895,6 @@ "metadata": {}, "outputs": [], "source": [ - "import urllib.request\n", "from sklearn.datasets import fetch_openml\n", "\n", "mnist = fetch_openml('mnist_784', as_frame=False)" @@ -1303,14 +1302,16 @@ "metadata": {}, "outputs": [], "source": [ - "# extra code\n", + "# extra code – downloads the ladybug image\n", "\n", - "root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n", + "import urllib.request\n", + "\n", + "homl3_root = \"https://github.com/ageron/handson-ml3/raw/main/\"\n", "filename = \"ladybug.png\"\n", "filepath = IMAGES_PATH / filename\n", "if not filepath.is_file():\n", " print(\"Downloading\", filename)\n", - " url = f\"{root}/images/unsupervised_learning/{filename}\"\n", + " url = f\"{homl3_root}/images/unsupervised_learning/{filename}\"\n", " urllib.request.urlretrieve(url, filepath)" ] },