From 29ef56964afb694765993f03710866d111be03d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Fri, 7 Apr 2017 21:33:53 +0200 Subject: [PATCH] Since mldata.org is down, download MNIST elsewhere --- 03_classification.ipynb | 25 ++++++++++++- 07_ensemble_learning_and_random_forests.ipynb | 37 ++++++++++++++++++- 08_dimensionality_reduction.ipynb | 37 ++++++++++++++++++- 13_convolutional_neural_networks.ipynb | 36 ++++++++++++++++-- 4 files changed, 126 insertions(+), 9 deletions(-) diff --git a/03_classification.ipynb b/03_classification.ipynb index ca9591b..579c829 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -87,14 +87,35 @@ "cell_type": "code", "execution_count": 2, "metadata": { - "collapsed": true, + "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ + "from six.moves import urllib\n", "from sklearn.datasets import fetch_mldata\n", - "mnist = fetch_mldata('MNIST original')" + "try:\n", + " mnist = fetch_mldata('MNIST original')\n", + "except urllib.error.HTTPError as ex:\n", + " print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n", + "\n", + " # Alternative method to load MNIST, if mldata.org is down\n", + " from scipy.io import loadmat\n", + " mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n", + " mnist_path = \"./mnist-original.mat\"\n", + " response = urllib.request.urlopen(mnist_alternative_url)\n", + " with open(mnist_path, \"wb\") as f:\n", + " content = response.read()\n", + " f.write(content)\n", + " mnist_raw = loadmat(mnist_path)\n", + " mnist = {\n", + " \"data\": mnist_raw[\"data\"].T,\n", + " \"target\": mnist_raw[\"label\"][0],\n", + " \"COL_NAMES\": [\"label\", \"data\"],\n", + " \"DESCR\": \"mldata.org dataset: mnist-original\",\n", + " }\n", + " print(\"Success!\")" ] }, { diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index 55ebebf..682c899 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -448,6 +448,41 @@ "## Feature importance" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "from six.moves import urllib\n", + "from sklearn.datasets import fetch_mldata\n", + "try:\n", + " mnist = fetch_mldata('MNIST original')\n", + "except urllib.error.HTTPError as ex:\n", + " print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n", + "\n", + " # Alternative method to load MNIST, if mldata.org is down\n", + " from scipy.io import loadmat\n", + " mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n", + " mnist_path = \"./mnist-original.mat\"\n", + " response = urllib.request.urlopen(mnist_alternative_url)\n", + " with open(mnist_path, \"wb\") as f:\n", + " content = response.read()\n", + " f.write(content)\n", + " mnist_raw = loadmat(mnist_path)\n", + " mnist = {\n", + " \"data\": mnist_raw[\"data\"].T,\n", + " \"target\": mnist_raw[\"label\"][0],\n", + " \"COL_NAMES\": [\"label\", \"data\"],\n", + " \"DESCR\": \"mldata.org dataset: mnist-original\",\n", + " }\n", + " print(\"Success!\")" + ] + }, { "cell_type": "code", "execution_count": 18, @@ -458,8 +493,6 @@ }, "outputs": [], "source": [ - "from sklearn.datasets import fetch_mldata\n", - "mnist = fetch_mldata('MNIST original')\n", "rnd_clf = RandomForestClassifier(random_state=42)\n", "rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])" ] diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb index 91455df..aac7851 100644 --- a/08_dimensionality_reduction.ipynb +++ b/08_dimensionality_reduction.ipynb @@ -806,6 +806,41 @@ "# MNIST compression" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "from six.moves import urllib\n", + "from sklearn.datasets import fetch_mldata\n", + "try:\n", + " mnist = fetch_mldata('MNIST original')\n", + "except urllib.error.HTTPError as ex:\n", + " print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n", + "\n", + " # Alternative method to load MNIST, if mldata.org is down\n", + " from scipy.io import loadmat\n", + " mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n", + " mnist_path = \"./mnist-original.mat\"\n", + " response = urllib.request.urlopen(mnist_alternative_url)\n", + " with open(mnist_path, \"wb\") as f:\n", + " content = response.read()\n", + " f.write(content)\n", + " mnist_raw = loadmat(mnist_path)\n", + " mnist = {\n", + " \"data\": mnist_raw[\"data\"].T,\n", + " \"target\": mnist_raw[\"label\"][0],\n", + " \"COL_NAMES\": [\"label\", \"data\"],\n", + " \"DESCR\": \"mldata.org dataset: mnist-original\",\n", + " }\n", + " print(\"Success!\")" + ] + }, { "cell_type": "code", "execution_count": 30, @@ -817,9 +852,7 @@ "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", - "from sklearn.datasets import fetch_mldata\n", "\n", - "mnist = fetch_mldata('MNIST original')\n", "X = mnist[\"data\"]\n", "y = mnist[\"target\"]\n", "\n", diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb index 94bf961..b6bbe01 100644 --- a/13_convolutional_neural_networks.ipynb +++ b/13_convolutional_neural_networks.ipynb @@ -401,6 +401,39 @@ "# MNIST" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from six.moves import urllib\n", + "from sklearn.datasets import fetch_mldata\n", + "try:\n", + " mnist = fetch_mldata('MNIST original')\n", + "except urllib.error.HTTPError as ex:\n", + " print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n", + "\n", + " # Alternative method to load MNIST, if mldata.org is down\n", + " from scipy.io import loadmat\n", + " mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n", + " mnist_path = \"./mnist-original.mat\"\n", + " response = urllib.request.urlopen(mnist_alternative_url)\n", + " with open(mnist_path, \"wb\") as f:\n", + " content = response.read()\n", + " f.write(content)\n", + " mnist_raw = loadmat(mnist_path)\n", + " mnist = {\n", + " \"data\": mnist_raw[\"data\"].T,\n", + " \"target\": mnist_raw[\"label\"][0],\n", + " \"COL_NAMES\": [\"label\", \"data\"],\n", + " \"DESCR\": \"mldata.org dataset: mnist-original\",\n", + " }\n", + " print(\"Success!\")" + ] + }, { "cell_type": "code", "execution_count": 15, @@ -411,9 +444,6 @@ }, "outputs": [], "source": [ - "from sklearn.datasets import fetch_mldata\n", - "\n", - "mnist = fetch_mldata('MNIST original')\n", "X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n", "y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)" ]