Since mldata.org is down, download MNIST elsewhere
parent
7997d4d38c
commit
29ef56964a
|
@ -87,14 +87,35 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from six.moves import urllib\n",
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"mnist = fetch_mldata('MNIST original')"
|
||||
"try:\n",
|
||||
" mnist = fetch_mldata('MNIST original')\n",
|
||||
"except urllib.error.HTTPError as ex:\n",
|
||||
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||
"\n",
|
||||
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||
" from scipy.io import loadmat\n",
|
||||
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||
" mnist_path = \"./mnist-original.mat\"\n",
|
||||
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||
" with open(mnist_path, \"wb\") as f:\n",
|
||||
" content = response.read()\n",
|
||||
" f.write(content)\n",
|
||||
" mnist_raw = loadmat(mnist_path)\n",
|
||||
" mnist = {\n",
|
||||
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||
" }\n",
|
||||
" print(\"Success!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -448,6 +448,41 @@
|
|||
"## Feature importance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from six.moves import urllib\n",
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"try:\n",
|
||||
" mnist = fetch_mldata('MNIST original')\n",
|
||||
"except urllib.error.HTTPError as ex:\n",
|
||||
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||
"\n",
|
||||
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||
" from scipy.io import loadmat\n",
|
||||
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||
" mnist_path = \"./mnist-original.mat\"\n",
|
||||
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||
" with open(mnist_path, \"wb\") as f:\n",
|
||||
" content = response.read()\n",
|
||||
" f.write(content)\n",
|
||||
" mnist_raw = loadmat(mnist_path)\n",
|
||||
" mnist = {\n",
|
||||
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||
" }\n",
|
||||
" print(\"Success!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
|
@ -458,8 +493,6 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"mnist = fetch_mldata('MNIST original')\n",
|
||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
|
||||
]
|
||||
|
|
|
@ -806,6 +806,41 @@
|
|||
"# MNIST compression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from six.moves import urllib\n",
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"try:\n",
|
||||
" mnist = fetch_mldata('MNIST original')\n",
|
||||
"except urllib.error.HTTPError as ex:\n",
|
||||
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||
"\n",
|
||||
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||
" from scipy.io import loadmat\n",
|
||||
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||
" mnist_path = \"./mnist-original.mat\"\n",
|
||||
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||
" with open(mnist_path, \"wb\") as f:\n",
|
||||
" content = response.read()\n",
|
||||
" f.write(content)\n",
|
||||
" mnist_raw = loadmat(mnist_path)\n",
|
||||
" mnist = {\n",
|
||||
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||
" }\n",
|
||||
" print(\"Success!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
|
@ -817,9 +852,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"\n",
|
||||
"mnist = fetch_mldata('MNIST original')\n",
|
||||
"X = mnist[\"data\"]\n",
|
||||
"y = mnist[\"target\"]\n",
|
||||
"\n",
|
||||
|
|
|
@ -401,6 +401,39 @@
|
|||
"# MNIST"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from six.moves import urllib\n",
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"try:\n",
|
||||
" mnist = fetch_mldata('MNIST original')\n",
|
||||
"except urllib.error.HTTPError as ex:\n",
|
||||
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||
"\n",
|
||||
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||
" from scipy.io import loadmat\n",
|
||||
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||
" mnist_path = \"./mnist-original.mat\"\n",
|
||||
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||
" with open(mnist_path, \"wb\") as f:\n",
|
||||
" content = response.read()\n",
|
||||
" f.write(content)\n",
|
||||
" mnist_raw = loadmat(mnist_path)\n",
|
||||
" mnist = {\n",
|
||||
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||
" }\n",
|
||||
" print(\"Success!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
|
@ -411,9 +444,6 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"\n",
|
||||
"mnist = fetch_mldata('MNIST original')\n",
|
||||
"X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
|
||||
"y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue