Move datasets to project ageron/data to shrink this repo
parent
8745a9c2ac
commit
c9b977309a
|
@ -4,19 +4,14 @@
|
||||||
*.old
|
*.old
|
||||||
*.pyc
|
*.pyc
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints/
|
||||||
.vscode/
|
.vscode/
|
||||||
checkpoint
|
checkpoint
|
||||||
logs/*
|
/logs
|
||||||
tf_logs/*
|
/tf_logs
|
||||||
images/**/*.png
|
/images
|
||||||
images/**/*.dot
|
|
||||||
my_*
|
my_*
|
||||||
person.proto
|
/person.proto
|
||||||
person.desc
|
/person.desc
|
||||||
person_pb2.py
|
/person_pb2.py
|
||||||
datasets/flowers
|
/datasets
|
||||||
datasets/spam
|
|
||||||
datasets/words
|
|
||||||
datasets/jsb_chorales
|
|
||||||
|
|
||||||
|
|
|
@ -130,11 +130,11 @@
|
||||||
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
|
"datapath = Path() / \"datasets\" / \"lifesat\"\n",
|
||||||
"datapath.mkdir(parents=True, exist_ok=True)\n",
|
"datapath.mkdir(parents=True, exist_ok=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
|
"data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||||
"filename = \"lifesat.csv\"\n",
|
"filename = \"lifesat.csv\"\n",
|
||||||
"if not (datapath / filename).is_file():\n",
|
"if not (datapath / filename).is_file():\n",
|
||||||
" print(\"Downloading\", filename)\n",
|
" print(\"Downloading\", filename)\n",
|
||||||
" url = root + \"datasets/lifesat/\" + filename\n",
|
" url = data_root + \"lifesat/\" + filename\n",
|
||||||
" urllib.request.urlretrieve(url, datapath / filename)"
|
" urllib.request.urlretrieve(url, datapath / filename)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -283,7 +283,7 @@
|
||||||
"for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n",
|
"for filename in (\"oecd_bli.csv\", \"gdp_per_capita.csv\"):\n",
|
||||||
" if not (datapath / filename).is_file():\n",
|
" if not (datapath / filename).is_file():\n",
|
||||||
" print(\"Downloading\", filename)\n",
|
" print(\"Downloading\", filename)\n",
|
||||||
" url = root + \"datasets/lifesat/\" + filename\n",
|
" url = data_root + \"lifesat/\" + filename\n",
|
||||||
" urllib.request.urlretrieve(url, datapath / filename)"
|
" urllib.request.urlretrieve(url, datapath / filename)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -110,8 +110,8 @@
|
||||||
" housing_path = Path() / \"datasets\" / \"housing\"\n",
|
" housing_path = Path() / \"datasets\" / \"housing\"\n",
|
||||||
" if not (housing_path / \"housing.csv\").is_file():\n",
|
" if not (housing_path / \"housing.csv\").is_file():\n",
|
||||||
" housing_path.mkdir(parents=True, exist_ok=True)\n",
|
" housing_path.mkdir(parents=True, exist_ok=True)\n",
|
||||||
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
|
" data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||||
" url = root + \"datasets/housing/housing.tgz\"\n",
|
" url = data_root + \"housing/housing.tgz\"\n",
|
||||||
" tgz_path = housing_path / \"housing.tgz\"\n",
|
" tgz_path = housing_path / \"housing.tgz\"\n",
|
||||||
" urllib.request.urlretrieve(url, tgz_path)\n",
|
" urllib.request.urlretrieve(url, tgz_path)\n",
|
||||||
" with tarfile.open(tgz_path) as housing_tgz:\n",
|
" with tarfile.open(tgz_path) as housing_tgz:\n",
|
||||||
|
@ -578,8 +578,8 @@
|
||||||
"# Download the California image\n",
|
"# Download the California image\n",
|
||||||
"filename = \"california.png\"\n",
|
"filename = \"california.png\"\n",
|
||||||
"if not (IMAGES_PATH / filename).is_file():\n",
|
"if not (IMAGES_PATH / filename).is_file():\n",
|
||||||
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
|
" homl3_root = \"https://github.com/ageron/handson-ml3/raw/main/\"\n",
|
||||||
" url = root + \"images/end_to_end_project/\" + filename\n",
|
" url = homl3_root + \"images/end_to_end_project/\" + filename\n",
|
||||||
" print(\"Downloading\", filename)\n",
|
" print(\"Downloading\", filename)\n",
|
||||||
" urllib.request.urlretrieve(url, IMAGES_PATH / filename)\n",
|
" urllib.request.urlretrieve(url, IMAGES_PATH / filename)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
@ -1635,8 +1635,8 @@
|
||||||
" filepath = titanic_path / filename\n",
|
" filepath = titanic_path / filename\n",
|
||||||
" if filepath.is_file():\n",
|
" if filepath.is_file():\n",
|
||||||
" continue\n",
|
" continue\n",
|
||||||
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
|
" data_root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||||
" url = root + \"/datasets/titanic/\" + filename\n",
|
" url = data_root + \"titanic/\" + filename\n",
|
||||||
" print(\"Downloading\", filename)\n",
|
" print(\"Downloading\", filename)\n",
|
||||||
" urllib.request.urlretrieve(url, filepath)\n",
|
" urllib.request.urlretrieve(url, filepath)\n",
|
||||||
" return [pd.read_csv(titanic_path / filename) for filename in filenames]"
|
" return [pd.read_csv(titanic_path / filename) for filename in filenames]"
|
||||||
|
@ -2123,9 +2123,9 @@
|
||||||
"import tarfile\n",
|
"import tarfile\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def fetch_spam_data():\n",
|
"def fetch_spam_data():\n",
|
||||||
" root = \"http://spamassassin.apache.org/old/publiccorpus/\"\n",
|
" spam_root = \"http://spamassassin.apache.org/old/publiccorpus/\"\n",
|
||||||
" ham_url = root + \"20030228_easy_ham.tar.bz2\"\n",
|
" ham_url = spam_root + \"20030228_easy_ham.tar.bz2\"\n",
|
||||||
" spam_url = root + \"20030228_spam.tar.bz2\"\n",
|
" spam_url = spam_root + \"20030228_spam.tar.bz2\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" spam_path = Path() / \"datasets\" / \"spam\"\n",
|
" spam_path = Path() / \"datasets\" / \"spam\"\n",
|
||||||
" spam_path.mkdir(parents=True, exist_ok=True)\n",
|
" spam_path.mkdir(parents=True, exist_ok=True)\n",
|
||||||
|
|
|
@ -755,8 +755,8 @@
|
||||||
" housing_path = Path() / \"datasets\" / \"housing\"\n",
|
" housing_path = Path() / \"datasets\" / \"housing\"\n",
|
||||||
" if not (housing_path / \"housing.csv\").is_file():\n",
|
" if not (housing_path / \"housing.csv\").is_file():\n",
|
||||||
" housing_path.mkdir(parents=True, exist_ok=True)\n",
|
" housing_path.mkdir(parents=True, exist_ok=True)\n",
|
||||||
" root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
|
" root = \"https://github.com/ageron/data/raw/main/\"\n",
|
||||||
" url = root + \"datasets/housing/housing.tgz\"\n",
|
" url = root + \"housing/housing.tgz\"\n",
|
||||||
" tgz_path = housing_path / \"housing.tgz\"\n",
|
" tgz_path = housing_path / \"housing.tgz\"\n",
|
||||||
" urllib.request.urlretrieve(url, tgz_path)\n",
|
" urllib.request.urlretrieve(url, tgz_path)\n",
|
||||||
" with tarfile.open(tgz_path) as housing_tgz:\n",
|
" with tarfile.open(tgz_path) as housing_tgz:\n",
|
||||||
|
|
|
@ -895,7 +895,6 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import urllib.request\n",
|
|
||||||
"from sklearn.datasets import fetch_openml\n",
|
"from sklearn.datasets import fetch_openml\n",
|
||||||
"\n",
|
"\n",
|
||||||
"mnist = fetch_openml('mnist_784', as_frame=False)"
|
"mnist = fetch_openml('mnist_784', as_frame=False)"
|
||||||
|
@ -1303,14 +1302,16 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# extra code\n",
|
"# extra code – downloads the ladybug image\n",
|
||||||
"\n",
|
"\n",
|
||||||
"root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
|
"import urllib.request\n",
|
||||||
|
"\n",
|
||||||
|
"homl3_root = \"https://github.com/ageron/handson-ml3/raw/main/\"\n",
|
||||||
"filename = \"ladybug.png\"\n",
|
"filename = \"ladybug.png\"\n",
|
||||||
"filepath = IMAGES_PATH / filename\n",
|
"filepath = IMAGES_PATH / filename\n",
|
||||||
"if not filepath.is_file():\n",
|
"if not filepath.is_file():\n",
|
||||||
" print(\"Downloading\", filename)\n",
|
" print(\"Downloading\", filename)\n",
|
||||||
" url = f\"{root}/images/unsupervised_learning/{filename}\"\n",
|
" url = f\"{homl3_root}/images/unsupervised_learning/{filename}\"\n",
|
||||||
" urllib.request.urlretrieve(url, filepath)"
|
" urllib.request.urlretrieve(url, filepath)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue