Since mldata.org is down, download MNIST elsewhere

main
Aurélien Geron 2017-04-07 21:33:53 +02:00
parent 7997d4d38c
commit 29ef56964a
4 changed files with 126 additions and 9 deletions

View File

@ -87,14 +87,35 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true,
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from six.moves import urllib\n",
"from sklearn.datasets import fetch_mldata\n",
"mnist = fetch_mldata('MNIST original')"
"try:\n",
" mnist = fetch_mldata('MNIST original')\n",
"except urllib.error.HTTPError as ex:\n",
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
"\n",
" # Alternative method to load MNIST, if mldata.org is down\n",
" from scipy.io import loadmat\n",
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
" mnist_path = \"./mnist-original.mat\"\n",
" response = urllib.request.urlopen(mnist_alternative_url)\n",
" with open(mnist_path, \"wb\") as f:\n",
" content = response.read()\n",
" f.write(content)\n",
" mnist_raw = loadmat(mnist_path)\n",
" mnist = {\n",
" \"data\": mnist_raw[\"data\"].T,\n",
" \"target\": mnist_raw[\"label\"][0],\n",
" \"COL_NAMES\": [\"label\", \"data\"],\n",
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
" }\n",
" print(\"Success!\")"
]
},
{

View File

@ -448,6 +448,41 @@
"## Feature importance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from six.moves import urllib\n",
"from sklearn.datasets import fetch_mldata\n",
"try:\n",
" mnist = fetch_mldata('MNIST original')\n",
"except urllib.error.HTTPError as ex:\n",
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
"\n",
" # Alternative method to load MNIST, if mldata.org is down\n",
" from scipy.io import loadmat\n",
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
" mnist_path = \"./mnist-original.mat\"\n",
" response = urllib.request.urlopen(mnist_alternative_url)\n",
" with open(mnist_path, \"wb\") as f:\n",
" content = response.read()\n",
" f.write(content)\n",
" mnist_raw = loadmat(mnist_path)\n",
" mnist = {\n",
" \"data\": mnist_raw[\"data\"].T,\n",
" \"target\": mnist_raw[\"label\"][0],\n",
" \"COL_NAMES\": [\"label\", \"data\"],\n",
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
" }\n",
" print(\"Success!\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
@ -458,8 +493,6 @@
},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_mldata\n",
"mnist = fetch_mldata('MNIST original')\n",
"rnd_clf = RandomForestClassifier(random_state=42)\n",
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
]

View File

@ -806,6 +806,41 @@
"# MNIST compression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from six.moves import urllib\n",
"from sklearn.datasets import fetch_mldata\n",
"try:\n",
" mnist = fetch_mldata('MNIST original')\n",
"except urllib.error.HTTPError as ex:\n",
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
"\n",
" # Alternative method to load MNIST, if mldata.org is down\n",
" from scipy.io import loadmat\n",
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
" mnist_path = \"./mnist-original.mat\"\n",
" response = urllib.request.urlopen(mnist_alternative_url)\n",
" with open(mnist_path, \"wb\") as f:\n",
" content = response.read()\n",
" f.write(content)\n",
" mnist_raw = loadmat(mnist_path)\n",
" mnist = {\n",
" \"data\": mnist_raw[\"data\"].T,\n",
" \"target\": mnist_raw[\"label\"][0],\n",
" \"COL_NAMES\": [\"label\", \"data\"],\n",
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
" }\n",
" print(\"Success!\")"
]
},
{
"cell_type": "code",
"execution_count": 30,
@ -817,9 +852,7 @@
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.datasets import fetch_mldata\n",
"\n",
"mnist = fetch_mldata('MNIST original')\n",
"X = mnist[\"data\"]\n",
"y = mnist[\"target\"]\n",
"\n",

View File

@ -401,6 +401,39 @@
"# MNIST"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from six.moves import urllib\n",
"from sklearn.datasets import fetch_mldata\n",
"try:\n",
" mnist = fetch_mldata('MNIST original')\n",
"except urllib.error.HTTPError as ex:\n",
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
"\n",
" # Alternative method to load MNIST, if mldata.org is down\n",
" from scipy.io import loadmat\n",
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
" mnist_path = \"./mnist-original.mat\"\n",
" response = urllib.request.urlopen(mnist_alternative_url)\n",
" with open(mnist_path, \"wb\") as f:\n",
" content = response.read()\n",
" f.write(content)\n",
" mnist_raw = loadmat(mnist_path)\n",
" mnist = {\n",
" \"data\": mnist_raw[\"data\"].T,\n",
" \"target\": mnist_raw[\"label\"][0],\n",
" \"COL_NAMES\": [\"label\", \"data\"],\n",
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
" }\n",
" print(\"Success!\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
@ -411,9 +444,6 @@
},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_mldata\n",
"\n",
"mnist = fetch_mldata('MNIST original')\n",
"X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
"y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
]