Since mldata.org is down, download MNIST elsewhere
parent
7997d4d38c
commit
29ef56964a
|
@ -87,14 +87,35 @@
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from six.moves import urllib\n",
|
||||||
"from sklearn.datasets import fetch_mldata\n",
|
"from sklearn.datasets import fetch_mldata\n",
|
||||||
"mnist = fetch_mldata('MNIST original')"
|
"try:\n",
|
||||||
|
" mnist = fetch_mldata('MNIST original')\n",
|
||||||
|
"except urllib.error.HTTPError as ex:\n",
|
||||||
|
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||||
|
" from scipy.io import loadmat\n",
|
||||||
|
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||||
|
" mnist_path = \"./mnist-original.mat\"\n",
|
||||||
|
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||||
|
" with open(mnist_path, \"wb\") as f:\n",
|
||||||
|
" content = response.read()\n",
|
||||||
|
" f.write(content)\n",
|
||||||
|
" mnist_raw = loadmat(mnist_path)\n",
|
||||||
|
" mnist = {\n",
|
||||||
|
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||||
|
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||||
|
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||||
|
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||||
|
" }\n",
|
||||||
|
" print(\"Success!\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -448,6 +448,41 @@
|
||||||
"## Feature importance"
|
"## Feature importance"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from six.moves import urllib\n",
|
||||||
|
"from sklearn.datasets import fetch_mldata\n",
|
||||||
|
"try:\n",
|
||||||
|
" mnist = fetch_mldata('MNIST original')\n",
|
||||||
|
"except urllib.error.HTTPError as ex:\n",
|
||||||
|
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||||
|
" from scipy.io import loadmat\n",
|
||||||
|
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||||
|
" mnist_path = \"./mnist-original.mat\"\n",
|
||||||
|
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||||
|
" with open(mnist_path, \"wb\") as f:\n",
|
||||||
|
" content = response.read()\n",
|
||||||
|
" f.write(content)\n",
|
||||||
|
" mnist_raw = loadmat(mnist_path)\n",
|
||||||
|
" mnist = {\n",
|
||||||
|
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||||
|
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||||
|
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||||
|
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||||
|
" }\n",
|
||||||
|
" print(\"Success!\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 18,
|
||||||
|
@ -458,8 +493,6 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.datasets import fetch_mldata\n",
|
|
||||||
"mnist = fetch_mldata('MNIST original')\n",
|
|
||||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||||
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
|
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
|
||||||
]
|
]
|
||||||
|
|
|
@ -806,6 +806,41 @@
|
||||||
"# MNIST compression"
|
"# MNIST compression"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from six.moves import urllib\n",
|
||||||
|
"from sklearn.datasets import fetch_mldata\n",
|
||||||
|
"try:\n",
|
||||||
|
" mnist = fetch_mldata('MNIST original')\n",
|
||||||
|
"except urllib.error.HTTPError as ex:\n",
|
||||||
|
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||||
|
" from scipy.io import loadmat\n",
|
||||||
|
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||||
|
" mnist_path = \"./mnist-original.mat\"\n",
|
||||||
|
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||||
|
" with open(mnist_path, \"wb\") as f:\n",
|
||||||
|
" content = response.read()\n",
|
||||||
|
" f.write(content)\n",
|
||||||
|
" mnist_raw = loadmat(mnist_path)\n",
|
||||||
|
" mnist = {\n",
|
||||||
|
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||||
|
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||||
|
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||||
|
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||||
|
" }\n",
|
||||||
|
" print(\"Success!\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 30,
|
||||||
|
@ -817,9 +852,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
"from sklearn.datasets import fetch_mldata\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"mnist = fetch_mldata('MNIST original')\n",
|
|
||||||
"X = mnist[\"data\"]\n",
|
"X = mnist[\"data\"]\n",
|
||||||
"y = mnist[\"target\"]\n",
|
"y = mnist[\"target\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
@ -401,6 +401,39 @@
|
||||||
"# MNIST"
|
"# MNIST"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from six.moves import urllib\n",
|
||||||
|
"from sklearn.datasets import fetch_mldata\n",
|
||||||
|
"try:\n",
|
||||||
|
" mnist = fetch_mldata('MNIST original')\n",
|
||||||
|
"except urllib.error.HTTPError as ex:\n",
|
||||||
|
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||||
|
" from scipy.io import loadmat\n",
|
||||||
|
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||||
|
" mnist_path = \"./mnist-original.mat\"\n",
|
||||||
|
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||||
|
" with open(mnist_path, \"wb\") as f:\n",
|
||||||
|
" content = response.read()\n",
|
||||||
|
" f.write(content)\n",
|
||||||
|
" mnist_raw = loadmat(mnist_path)\n",
|
||||||
|
" mnist = {\n",
|
||||||
|
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||||
|
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||||
|
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||||
|
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||||
|
" }\n",
|
||||||
|
" print(\"Success!\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 15,
|
||||||
|
@ -411,9 +444,6 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.datasets import fetch_mldata\n",
|
|
||||||
"\n",
|
|
||||||
"mnist = fetch_mldata('MNIST original')\n",
|
|
||||||
"X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
|
"X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
|
||||||
"y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
|
"y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue