Replace y_gen_faces with y_bad, and other little fixes (thanks Ian!)
parent
3d418c0308
commit
5e9bec62bc
|
@ -1660,7 +1660,8 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"log_reg.score(X_test, y_test)"
|
"log_reg_score = log_reg.score(X_test, y_test)\n",
|
||||||
|
"log_reg_score"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1698,7 +1699,15 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"pipeline.score(X_test, y_test)"
|
"pipeline_score = pipeline.score(X_test, y_test)\n",
|
||||||
|
"pipeline_score"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"How much did the error rate drop?"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1707,14 +1716,14 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"1 - (1 - 0.977777) / (1 - 0.968888)"
|
"1 - (1 - pipeline_score) / (1 - log_reg_score)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"How about that? We reduced the error rate by over 28%! But we chose the number of clusters $k$ completely arbitrarily, we can surely do better. Since K-Means is just a preprocessing step in a classification pipeline, finding a good value for $k$ is much simpler than earlier: there's no need to perform silhouette analysis or minimize the inertia, the best value of $k$ is simply the one that results in the best classification performance."
|
"How about that? We reduced the error rate by over 35%! But we chose the number of clusters $k$ completely arbitrarily, we can surely do better. Since K-Means is just a preprocessing step in a classification pipeline, finding a good value for $k$ is much simpler than earlier: there's no need to perform silhouette analysis or minimize the inertia, the best value of $k$ is simply the one that results in the best classification performance."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3473,11 +3482,12 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def plot_faces(faces, labels, n_cols=5):\n",
|
"def plot_faces(faces, labels, n_cols=5):\n",
|
||||||
|
" faces = faces.reshape(-1, 64, 64)\n",
|
||||||
" n_rows = (len(faces) - 1) // n_cols + 1\n",
|
" n_rows = (len(faces) - 1) // n_cols + 1\n",
|
||||||
" plt.figure(figsize=(n_cols, n_rows * 1.1))\n",
|
" plt.figure(figsize=(n_cols, n_rows * 1.1))\n",
|
||||||
" for index, (face, label) in enumerate(zip(faces, labels)):\n",
|
" for index, (face, label) in enumerate(zip(faces, labels)):\n",
|
||||||
" plt.subplot(n_rows, n_cols, index + 1)\n",
|
" plt.subplot(n_rows, n_cols, index + 1)\n",
|
||||||
" plt.imshow(face.reshape(64, 64), cmap=\"gray\")\n",
|
" plt.imshow(face, cmap=\"gray\")\n",
|
||||||
" plt.axis(\"off\")\n",
|
" plt.axis(\"off\")\n",
|
||||||
" plt.title(label)\n",
|
" plt.title(label)\n",
|
||||||
" plt.show()\n",
|
" plt.show()\n",
|
||||||
|
@ -3485,7 +3495,7 @@
|
||||||
"for cluster_id in np.unique(best_model.labels_):\n",
|
"for cluster_id in np.unique(best_model.labels_):\n",
|
||||||
" print(\"Cluster\", cluster_id)\n",
|
" print(\"Cluster\", cluster_id)\n",
|
||||||
" in_cluster = best_model.labels_==cluster_id\n",
|
" in_cluster = best_model.labels_==cluster_id\n",
|
||||||
" faces = X_train[in_cluster].reshape(-1, 64, 64)\n",
|
" faces = X_train[in_cluster]\n",
|
||||||
" labels = y_train[in_cluster]\n",
|
" labels = y_train[in_cluster]\n",
|
||||||
" plot_faces(faces, labels)"
|
" plot_faces(faces, labels)"
|
||||||
]
|
]
|
||||||
|
@ -3709,7 +3719,6 @@
|
||||||
"n_darkened = 3\n",
|
"n_darkened = 3\n",
|
||||||
"darkened = X_train[:n_darkened].copy()\n",
|
"darkened = X_train[:n_darkened].copy()\n",
|
||||||
"darkened[:, 1:-1] *= 0.3\n",
|
"darkened[:, 1:-1] *= 0.3\n",
|
||||||
"darkened = darkened.reshape(-1, 64*64)\n",
|
|
||||||
"y_darkened = y_train[:n_darkened]\n",
|
"y_darkened = y_train[:n_darkened]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X_bad_faces = np.r_[rotated, flipped, darkened]\n",
|
"X_bad_faces = np.r_[rotated, flipped, darkened]\n",
|
||||||
|
@ -3819,7 +3828,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"plot_faces(X_bad_faces, y_gen_faces)"
|
"plot_faces(X_bad_faces, y_bad)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3829,7 +3838,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"X_bad_faces_reconstructed = pca.inverse_transform(X_bad_faces_pca)\n",
|
"X_bad_faces_reconstructed = pca.inverse_transform(X_bad_faces_pca)\n",
|
||||||
"plot_faces(X_bad_faces_reconstructed, y_gen_faces)"
|
"plot_faces(X_bad_faces_reconstructed, y_bad)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue