Clarify figure 4-19
parent
107b90d9b3
commit
d4d15a9c59
|
@ -935,12 +935,11 @@
|
||||||
"source": [
|
"source": [
|
||||||
"t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
|
"t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# ignoring bias term\n",
|
|
||||||
"t1s = np.linspace(t1a, t1b, 500)\n",
|
"t1s = np.linspace(t1a, t1b, 500)\n",
|
||||||
"t2s = np.linspace(t2a, t2b, 500)\n",
|
"t2s = np.linspace(t2a, t2b, 500)\n",
|
||||||
"t1, t2 = np.meshgrid(t1s, t2s)\n",
|
"t1, t2 = np.meshgrid(t1s, t2s)\n",
|
||||||
"T = np.c_[t1.ravel(), t2.ravel()]\n",
|
"T = np.c_[t1.ravel(), t2.ravel()]\n",
|
||||||
"Xr = np.array([[-1, 1], [-0.3, -1], [1, 0.1]])\n",
|
"Xr = np.array([[1, 1], [1, -1], [1, 0.5]])\n",
|
||||||
"yr = 2 * Xr[:, :1] + 0.5 * Xr[:, 1:]\n",
|
"yr = 2 * Xr[:, :1] + 0.5 * Xr[:, 1:]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"J = (1/len(Xr) * np.sum((T.dot(Xr.T) - yr.T)**2, axis=1)).reshape(t1.shape)\n",
|
"J = (1/len(Xr) * np.sum((T.dot(Xr.T) - yr.T)**2, axis=1)).reshape(t1.shape)\n",
|
||||||
|
@ -960,18 +959,17 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def bgd_path(theta, X, y, l1, l2, core = 1, eta = 0.1, n_iterations = 50):\n",
|
"def bgd_path(theta, X, y, l1, l2, core = 1, eta = 0.05, n_iterations = 200):\n",
|
||||||
" path = [theta]\n",
|
" path = [theta]\n",
|
||||||
" for iteration in range(n_iterations):\n",
|
" for iteration in range(n_iterations):\n",
|
||||||
" gradients = core * 2/len(X) * X.T.dot(X.dot(theta) - y) + l1 * np.sign(theta) + 2 * l2 * theta\n",
|
" gradients = core * 2/len(X) * X.T.dot(X.dot(theta) - y) + l1 * np.sign(theta) + l2 * theta\n",
|
||||||
"\n",
|
|
||||||
" theta = theta - eta * gradients\n",
|
" theta = theta - eta * gradients\n",
|
||||||
" path.append(theta)\n",
|
" path.append(theta)\n",
|
||||||
" return np.array(path)\n",
|
" return np.array(path)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.figure(figsize=(12, 8))\n",
|
"fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(10.1, 8))\n",
|
||||||
"for i, N, l1, l2, title in ((0, N1, 0.5, 0, \"Lasso\"), (1, N2, 0, 0.1, \"Ridge\")):\n",
|
"for i, N, l1, l2, title in ((0, N1, 2., 0, \"Lasso\"), (1, N2, 0, 2., \"Ridge\")):\n",
|
||||||
" JR = J + l1 * N1 + l2 * N2**2\n",
|
" JR = J + l1 * N1 + l2 * 0.5 * N2**2\n",
|
||||||
" \n",
|
" \n",
|
||||||
" tr_min_idx = np.unravel_index(np.argmin(JR), JR.shape)\n",
|
" tr_min_idx = np.unravel_index(np.argmin(JR), JR.shape)\n",
|
||||||
" t1r_min, t2r_min = t1[tr_min_idx], t2[tr_min_idx]\n",
|
" t1r_min, t2r_min = t1[tr_min_idx], t2[tr_min_idx]\n",
|
||||||
|
@ -982,34 +980,36 @@
|
||||||
" \n",
|
" \n",
|
||||||
" path_J = bgd_path(t_init, Xr, yr, l1=0, l2=0)\n",
|
" path_J = bgd_path(t_init, Xr, yr, l1=0, l2=0)\n",
|
||||||
" path_JR = bgd_path(t_init, Xr, yr, l1, l2)\n",
|
" path_JR = bgd_path(t_init, Xr, yr, l1, l2)\n",
|
||||||
" path_N = bgd_path(t_init, Xr, yr, np.sign(l1)/3, np.sign(l2), core=0)\n",
|
" path_N = bgd_path(np.array([[2.0], [0.5]]), Xr, yr, np.sign(l1)/3, np.sign(l2), core=0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" plt.subplot(221 + i * 2)\n",
|
" ax = axes[i, 0]\n",
|
||||||
" plt.grid(True)\n",
|
" ax.grid(True)\n",
|
||||||
" plt.axhline(y=0, color='k')\n",
|
" ax.axhline(y=0, color='k')\n",
|
||||||
" plt.axvline(x=0, color='k')\n",
|
" ax.axvline(x=0, color='k')\n",
|
||||||
" plt.contourf(t1, t2, J, levels=levelsJ, alpha=0.9)\n",
|
" ax.contourf(t1, t2, N / 2., levels=levelsN)\n",
|
||||||
" plt.contour(t1, t2, N, levels=levelsN)\n",
|
" ax.plot(path_N[:, 0], path_N[:, 1], \"y--\")\n",
|
||||||
" plt.plot(path_J[:, 0], path_J[:, 1], \"w-o\")\n",
|
" ax.plot(0, 0, \"ys\")\n",
|
||||||
" plt.plot(path_N[:, 0], path_N[:, 1], \"y-^\")\n",
|
" ax.plot(t1_min, t2_min, \"ys\")\n",
|
||||||
" plt.plot(t1_min, t2_min, \"rs\")\n",
|
" ax.set_title(r\"$\\ell_{}$ penalty\".format(i + 1), fontsize=16)\n",
|
||||||
" plt.title(r\"$\\ell_{}$ penalty\".format(i + 1), fontsize=16)\n",
|
" ax.axis([t1a, t1b, t2a, t2b])\n",
|
||||||
" plt.axis([t1a, t1b, t2a, t2b])\n",
|
|
||||||
" if i == 1:\n",
|
" if i == 1:\n",
|
||||||
" plt.xlabel(r\"$\\theta_1$\", fontsize=20)\n",
|
" ax.set_xlabel(r\"$\\theta_1$\", fontsize=16)\n",
|
||||||
" plt.ylabel(r\"$\\theta_2$\", fontsize=20, rotation=0)\n",
|
" ax.set_ylabel(r\"$\\theta_2$\", fontsize=16, rotation=0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" plt.subplot(222 + i * 2)\n",
|
" ax = axes[i, 1]\n",
|
||||||
" plt.grid(True)\n",
|
" ax.grid(True)\n",
|
||||||
" plt.axhline(y=0, color='k')\n",
|
" ax.axhline(y=0, color='k')\n",
|
||||||
" plt.axvline(x=0, color='k')\n",
|
" ax.axvline(x=0, color='k')\n",
|
||||||
" plt.contourf(t1, t2, JR, levels=levelsJR, alpha=0.9)\n",
|
" ax.contourf(t1, t2, JR, levels=levelsJR, alpha=0.9)\n",
|
||||||
" plt.plot(path_JR[:, 0], path_JR[:, 1], \"w-o\")\n",
|
" ax.plot(path_JR[:, 0], path_JR[:, 1], \"w-o\")\n",
|
||||||
" plt.plot(t1r_min, t2r_min, \"rs\")\n",
|
" ax.plot(path_N[:, 0], path_N[:, 1], \"y--\")\n",
|
||||||
" plt.title(title, fontsize=16)\n",
|
" ax.plot(0, 0, \"ys\")\n",
|
||||||
" plt.axis([t1a, t1b, t2a, t2b])\n",
|
" ax.plot(t1_min, t2_min, \"ys\")\n",
|
||||||
|
" ax.plot(t1r_min, t2r_min, \"rs\")\n",
|
||||||
|
" ax.set_title(title, fontsize=16)\n",
|
||||||
|
" ax.axis([t1a, t1b, t2a, t2b])\n",
|
||||||
" if i == 1:\n",
|
" if i == 1:\n",
|
||||||
" plt.xlabel(r\"$\\theta_1$\", fontsize=20)\n",
|
" ax.set_xlabel(r\"$\\theta_1$\", fontsize=16)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"save_fig(\"lasso_vs_ridge_plot\")\n",
|
"save_fig(\"lasso_vs_ridge_plot\")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
|
|
Loading…
Reference in New Issue