Big update of chapter 5 for 3rd edition
parent
608d9ce815
commit
8e97aab84b
|
@ -125,6 +125,13 @@
|
||||||
"# Linear SVM Classification"
|
"# Linear SVM Classification"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The book starts with a few figures, before the first code example, so the next three cells generate and save these figures. You can skip them if you want."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 5,
|
||||||
|
@ -168,12 +175,13 @@
|
||||||
" margin = 1/w[1]\n",
|
" margin = 1/w[1]\n",
|
||||||
" gutter_up = decision_boundary + margin\n",
|
" gutter_up = decision_boundary + margin\n",
|
||||||
" gutter_down = decision_boundary - margin\n",
|
" gutter_down = decision_boundary - margin\n",
|
||||||
"\n",
|
|
||||||
" svs = svm_clf.support_vectors_\n",
|
" svs = svm_clf.support_vectors_\n",
|
||||||
" plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA')\n",
|
"\n",
|
||||||
" plt.plot(x0, decision_boundary, \"k-\", linewidth=2)\n",
|
" plt.plot(x0, decision_boundary, \"k-\", linewidth=2, zorder=-2)\n",
|
||||||
" plt.plot(x0, gutter_up, \"k--\", linewidth=2)\n",
|
" plt.plot(x0, gutter_up, \"k--\", linewidth=2, zorder=-2)\n",
|
||||||
" plt.plot(x0, gutter_down, \"k--\", linewidth=2)\n",
|
" plt.plot(x0, gutter_down, \"k--\", linewidth=2, zorder=-2)\n",
|
||||||
|
" plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#AAA',\n",
|
||||||
|
" zorder=-1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig, axes = plt.subplots(ncols=2, figsize=(10,2.7), sharey=True)\n",
|
"fig, axes = plt.subplots(ncols=2, figsize=(10,2.7), sharey=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -187,6 +195,7 @@
|
||||||
"plt.ylabel(\"Petal width\")\n",
|
"plt.ylabel(\"Petal width\")\n",
|
||||||
"plt.legend(loc=\"upper left\")\n",
|
"plt.legend(loc=\"upper left\")\n",
|
||||||
"plt.axis([0, 5.5, 0, 2])\n",
|
"plt.axis([0, 5.5, 0, 2])\n",
|
||||||
|
"plt.gca().set_aspect(\"equal\")\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.sca(axes[1])\n",
|
"plt.sca(axes[1])\n",
|
||||||
|
@ -195,6 +204,7 @@
|
||||||
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\")\n",
|
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\")\n",
|
||||||
"plt.xlabel(\"Petal length\")\n",
|
"plt.xlabel(\"Petal length\")\n",
|
||||||
"plt.axis([0, 5.5, 0, 2])\n",
|
"plt.axis([0, 5.5, 0, 2])\n",
|
||||||
|
"plt.gca().set_aspect(\"equal\")\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"save_fig(\"large_margin_classification_plot\")\n",
|
"save_fig(\"large_margin_classification_plot\")\n",
|
||||||
|
@ -320,17 +330,17 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"from sklearn import datasets\n",
|
"from sklearn.datasets import load_iris\n",
|
||||||
"from sklearn.pipeline import make_pipeline\n",
|
"from sklearn.pipeline import make_pipeline\n",
|
||||||
"from sklearn.preprocessing import StandardScaler\n",
|
"from sklearn.preprocessing import StandardScaler\n",
|
||||||
"from sklearn.svm import LinearSVC\n",
|
"from sklearn.svm import LinearSVC\n",
|
||||||
"\n",
|
"\n",
|
||||||
"iris = datasets.load_iris(as_frame=True)\n",
|
"iris = load_iris(as_frame=True)\n",
|
||||||
"X = iris.data[[\"petal length (cm)\", \"petal width (cm)\"]].values\n",
|
"X = iris.data[[\"petal length (cm)\", \"petal width (cm)\"]].values\n",
|
||||||
"y = (iris.target == 2) # Iris virginica\n",
|
"y = (iris.target == 2) # Iris virginica\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svm_clf = make_pipeline(StandardScaler(),\n",
|
"svm_clf = make_pipeline(StandardScaler(),\n",
|
||||||
" LinearSVC(C=1, loss=\"hinge\", random_state=42))\n",
|
" LinearSVC(C=1, random_state=42))\n",
|
||||||
"svm_clf.fit(X, y)"
|
"svm_clf.fit(X, y)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -340,7 +350,8 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"svm_clf.predict([[5.5, 1.7]])"
|
"X_new = [[5.5, 1.7], [5.0, 1.5]]\n",
|
||||||
|
"svm_clf.predict(X_new)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -348,12 +359,21 @@
|
||||||
"execution_count": 10,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"svm_clf.decision_function(X_new)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# not in the book – this cell generates and saves Figure 5–4\n",
|
"# not in the book – this cell generates and saves Figure 5–4\n",
|
||||||
"\n",
|
"\n",
|
||||||
"scaler = StandardScaler()\n",
|
"scaler = StandardScaler()\n",
|
||||||
"svm_clf1 = LinearSVC(C=1, loss=\"hinge\", max_iter=10_000, random_state=42)\n",
|
"svm_clf1 = LinearSVC(C=1, max_iter=10_000, random_state=42)\n",
|
||||||
"svm_clf2 = LinearSVC(C=100, loss=\"hinge\", max_iter=10_000, random_state=42)\n",
|
"svm_clf2 = LinearSVC(C=100, max_iter=10_000, random_state=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"scaled_svm_clf1 = make_pipeline(scaler, svm_clf1)\n",
|
"scaled_svm_clf1 = make_pipeline(scaler, svm_clf1)\n",
|
||||||
"scaled_svm_clf2 = make_pipeline(scaler, svm_clf2)\n",
|
"scaled_svm_clf2 = make_pipeline(scaler, svm_clf2)\n",
|
||||||
|
@ -387,7 +407,7 @@
|
||||||
"plt.xlabel(\"Petal length\")\n",
|
"plt.xlabel(\"Petal length\")\n",
|
||||||
"plt.ylabel(\"Petal width\")\n",
|
"plt.ylabel(\"Petal width\")\n",
|
||||||
"plt.legend(loc=\"upper left\")\n",
|
"plt.legend(loc=\"upper left\")\n",
|
||||||
"plt.title(\"$C = {}$\".format(svm_clf1.C))\n",
|
"plt.title(f\"$C = {svm_clf1.C}$\")\n",
|
||||||
"plt.axis([4, 5.9, 0.8, 2.8])\n",
|
"plt.axis([4, 5.9, 0.8, 2.8])\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -396,7 +416,7 @@
|
||||||
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
|
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
|
||||||
"plot_svc_decision_boundary(svm_clf2, 4, 5.99)\n",
|
"plot_svc_decision_boundary(svm_clf2, 4, 5.99)\n",
|
||||||
"plt.xlabel(\"Petal length\")\n",
|
"plt.xlabel(\"Petal length\")\n",
|
||||||
"plt.title(\"$C = {}$\".format(svm_clf2.C))\n",
|
"plt.title(f\"$C = {svm_clf2.C}$\")\n",
|
||||||
"plt.axis([4, 5.9, 0.8, 2.8])\n",
|
"plt.axis([4, 5.9, 0.8, 2.8])\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -413,7 +433,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -461,12 +481,11 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.datasets import make_moons\n",
|
"from sklearn.datasets import make_moons\n",
|
||||||
"from sklearn.pipeline import make_pipeline\n",
|
|
||||||
"from sklearn.preprocessing import PolynomialFeatures\n",
|
"from sklearn.preprocessing import PolynomialFeatures\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X, y = make_moons(n_samples=100, noise=0.15, random_state=42)\n",
|
"X, y = make_moons(n_samples=100, noise=0.15, random_state=42)\n",
|
||||||
|
@ -474,14 +493,14 @@
|
||||||
"polynomial_svm_clf = make_pipeline(\n",
|
"polynomial_svm_clf = make_pipeline(\n",
|
||||||
" PolynomialFeatures(degree=3),\n",
|
" PolynomialFeatures(degree=3),\n",
|
||||||
" StandardScaler(),\n",
|
" StandardScaler(),\n",
|
||||||
" LinearSVC(C=10, loss=\"hinge\", max_iter=10_000, random_state=42)\n",
|
" LinearSVC(C=10, max_iter=10_000, random_state=42)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"polynomial_svm_clf.fit(X, y)"
|
"polynomial_svm_clf.fit(X, y)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -528,7 +547,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -543,7 +562,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -581,7 +600,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 17,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
|
@ -651,18 +670,6 @@
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 17,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"x1_example = X1D[3, 0]\n",
|
|
||||||
"for landmark in (-2, 1):\n",
|
|
||||||
" k = gaussian_rbf(np.array([[x1_example]]), np.array([[landmark]]), gamma)\n",
|
|
||||||
" print(\"Phi({}, {}) = {}\".format(x1_example, landmark, k))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -722,7 +729,7 @@
|
||||||
" plot_predictions(svm_clf, [-1.5, 2.45, -1, 1.5])\n",
|
" plot_predictions(svm_clf, [-1.5, 2.45, -1, 1.5])\n",
|
||||||
" plot_dataset(X, y, [-1.5, 2.45, -1, 1.5])\n",
|
" plot_dataset(X, y, [-1.5, 2.45, -1, 1.5])\n",
|
||||||
" gamma, C = hyperparams[i]\n",
|
" gamma, C = hyperparams[i]\n",
|
||||||
" plt.title(r\"$\\gamma = {}, C = {}$\".format(gamma, C))\n",
|
" plt.title(fr\"$\\gamma = {gamma}, C = {C}$\")\n",
|
||||||
" if i in (0, 1):\n",
|
" if i in (0, 1):\n",
|
||||||
" plt.xlabel(\"\")\n",
|
" plt.xlabel(\"\")\n",
|
||||||
" if i in (1, 3):\n",
|
" if i in (1, 3):\n",
|
||||||
|
@ -745,6 +752,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# not in the book – this code generates a simple linear dataset\n",
|
||||||
"np.random.seed(42)\n",
|
"np.random.seed(42)\n",
|
||||||
"m = 50\n",
|
"m = 50\n",
|
||||||
"X = 2 * np.random.rand(m, 1)\n",
|
"X = 2 * np.random.rand(m, 1)\n",
|
||||||
|
@ -766,7 +774,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.svm import LinearSVR\n",
|
"from sklearn.svm import LinearSVR\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svm_reg = LinearSVR(epsilon=1.5, random_state=42)\n",
|
"svm_reg = make_pipeline(StandardScaler(),\n",
|
||||||
|
" LinearSVR(epsilon=0.5, random_state=42))\n",
|
||||||
"svm_reg.fit(X, y)"
|
"svm_reg.fit(X, y)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -780,47 +789,49 @@
|
||||||
"\n",
|
"\n",
|
||||||
"def find_support_vectors(svm_reg, X, y):\n",
|
"def find_support_vectors(svm_reg, X, y):\n",
|
||||||
" y_pred = svm_reg.predict(X)\n",
|
" y_pred = svm_reg.predict(X)\n",
|
||||||
" off_margin = (np.abs(y - y_pred) >= svm_reg.epsilon)\n",
|
" epsilon = svm_reg[-1].epsilon\n",
|
||||||
|
" off_margin = np.abs(y - y_pred) >= epsilon\n",
|
||||||
" return np.argwhere(off_margin)\n",
|
" return np.argwhere(off_margin)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def plot_svm_regression(svm_reg, X, y, axes):\n",
|
"def plot_svm_regression(svm_reg, X, y, axes):\n",
|
||||||
" x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1)\n",
|
" x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1)\n",
|
||||||
" y_pred = svm_reg.predict(x1s)\n",
|
" y_pred = svm_reg.predict(x1s)\n",
|
||||||
" plt.plot(x1s, y_pred, \"k-\", linewidth=2, label=r\"$\\hat{y}$\")\n",
|
" epsilon = svm_reg[-1].epsilon\n",
|
||||||
" plt.plot(x1s, y_pred + svm_reg.epsilon, \"k--\")\n",
|
" plt.plot(x1s, y_pred, \"k-\", linewidth=2, label=r\"$\\hat{y}$\", zorder=-2)\n",
|
||||||
" plt.plot(x1s, y_pred - svm_reg.epsilon, \"k--\")\n",
|
" plt.plot(x1s, y_pred + epsilon, \"k--\", zorder=-2)\n",
|
||||||
" plt.scatter(X[svm_reg.support_], y[svm_reg.support_], s=180, facecolors='#FFAAAA')\n",
|
" plt.plot(x1s, y_pred - epsilon, \"k--\", zorder=-2)\n",
|
||||||
|
" plt.scatter(X[svm_reg._support], y[svm_reg._support], s=180,\n",
|
||||||
|
" facecolors='#AAA', zorder=-1)\n",
|
||||||
" plt.plot(X, y, \"bo\")\n",
|
" plt.plot(X, y, \"bo\")\n",
|
||||||
" plt.xlabel(r\"$x_1$\")\n",
|
" plt.xlabel(r\"$x_1$\")\n",
|
||||||
" plt.legend(loc=\"upper left\")\n",
|
" plt.legend(loc=\"upper left\")\n",
|
||||||
" plt.axis(axes)\n",
|
" plt.axis(axes)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svm_reg1 = LinearSVR(epsilon=1.5, random_state=42)\n",
|
"svm_reg2 = make_pipeline(StandardScaler(),\n",
|
||||||
"svm_reg2 = LinearSVR(epsilon=0.5, random_state=42)\n",
|
" LinearSVR(epsilon=1.2, random_state=42))\n",
|
||||||
"svm_reg1.fit(X, y)\n",
|
|
||||||
"svm_reg2.fit(X, y)\n",
|
"svm_reg2.fit(X, y)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svm_reg1.support_ = find_support_vectors(svm_reg1, X, y)\n",
|
"svm_reg._support = find_support_vectors(svm_reg, X, y)\n",
|
||||||
"svm_reg2.support_ = find_support_vectors(svm_reg2, X, y)\n",
|
"svm_reg2._support = find_support_vectors(svm_reg2, X, y)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"eps_x1 = 1\n",
|
"eps_x1 = 1\n",
|
||||||
"eps_y_pred = svm_reg1.predict([[eps_x1]])\n",
|
"eps_y_pred = svm_reg2.predict([[eps_x1]])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig, axes = plt.subplots(ncols=2, figsize=(9, 4), sharey=True)\n",
|
"fig, axes = plt.subplots(ncols=2, figsize=(9, 4), sharey=True)\n",
|
||||||
"plt.sca(axes[0])\n",
|
"plt.sca(axes[0])\n",
|
||||||
"plot_svm_regression(svm_reg1, X, y, [0, 2, 3, 11])\n",
|
"plot_svm_regression(svm_reg, X, y, [0, 2, 3, 11])\n",
|
||||||
"plt.title(r\"$\\epsilon = {}$\".format(svm_reg1.epsilon))\n",
|
"plt.title(fr\"$\\epsilon = {svm_reg[-1].epsilon}$\")\n",
|
||||||
"plt.ylabel(r\"$y$\", rotation=0)\n",
|
"plt.ylabel(r\"$y$\", rotation=0)\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"plt.annotate(\n",
|
|
||||||
" '', xy=(eps_x1, eps_y_pred), xycoords='data',\n",
|
|
||||||
" xytext=(eps_x1, eps_y_pred - svm_reg1.epsilon),\n",
|
|
||||||
" textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5}\n",
|
|
||||||
" )\n",
|
|
||||||
"plt.text(0.91, 5.6, r\"$\\epsilon$\", fontsize=16)\n",
|
|
||||||
"plt.sca(axes[1])\n",
|
"plt.sca(axes[1])\n",
|
||||||
"plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11])\n",
|
"plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11])\n",
|
||||||
"plt.title(r\"$\\epsilon = {}$\".format(svm_reg2.epsilon))\n",
|
"plt.title(fr\"$\\epsilon = {svm_reg2[-1].epsilon}$\")\n",
|
||||||
|
"plt.annotate(\n",
|
||||||
|
" '', xy=(eps_x1, eps_y_pred), xycoords='data',\n",
|
||||||
|
" xytext=(eps_x1, eps_y_pred - svm_reg2[-1].epsilon),\n",
|
||||||
|
" textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5}\n",
|
||||||
|
" )\n",
|
||||||
|
"plt.text(0.90, 5.4, r\"$\\epsilon$\", fontsize=16)\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"save_fig(\"svm_regression_plot\")\n",
|
"save_fig(\"svm_regression_plot\")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
|
@ -832,8 +843,9 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# not in the book – this code generates a simple quadratic dataset\n",
|
||||||
"np.random.seed(42)\n",
|
"np.random.seed(42)\n",
|
||||||
"m = 100\n",
|
"m = 50\n",
|
||||||
"X = 2 * np.random.rand(m, 1) - 1\n",
|
"X = 2 * np.random.rand(m, 1) - 1\n",
|
||||||
"y = (0.2 + 0.1 * X + 0.5 * X ** 2 + np.random.randn(m, 1) / 10).ravel()"
|
"y = (0.2 + 0.1 * X + 0.5 * X ** 2 + np.random.randn(m, 1) / 10).ravel()"
|
||||||
]
|
]
|
||||||
|
@ -853,7 +865,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.svm import SVR\n",
|
"from sklearn.svm import SVR\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svm_poly_reg = SVR(kernel=\"poly\", degree=2, C=100)\n",
|
"svm_poly_reg = make_pipeline(StandardScaler(),\n",
|
||||||
|
" SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1))\n",
|
||||||
"svm_poly_reg.fit(X, y)"
|
"svm_poly_reg.fit(X, y)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -862,35 +875,30 @@
|
||||||
"execution_count": 25,
|
"execution_count": 25,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
|
||||||
"svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100)\n",
|
|
||||||
"svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01)\n",
|
|
||||||
"svm_poly_reg1.fit(X, y)\n",
|
|
||||||
"svm_poly_reg2.fit(X, y)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 26,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# not in the book – this cell generates and saves Figure 5–11\n",
|
"# not in the book – this cell generates and saves Figure 5–11\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"svm_poly_reg2 = make_pipeline(StandardScaler(),\n",
|
||||||
|
" SVR(kernel=\"poly\", degree=2, C=100))\n",
|
||||||
|
"svm_poly_reg2.fit(X, y)\n",
|
||||||
|
"\n",
|
||||||
|
"svm_poly_reg._support = find_support_vectors(svm_poly_reg, X, y)\n",
|
||||||
|
"svm_poly_reg2._support = find_support_vectors(svm_poly_reg2, X, y)\n",
|
||||||
|
"\n",
|
||||||
"fig, axes = plt.subplots(ncols=2, figsize=(9, 4), sharey=True)\n",
|
"fig, axes = plt.subplots(ncols=2, figsize=(9, 4), sharey=True)\n",
|
||||||
"plt.sca(axes[0])\n",
|
"plt.sca(axes[0])\n",
|
||||||
"plot_svm_regression(svm_poly_reg1, X, y, [-1, 1, 0, 1])\n",
|
"plot_svm_regression(svm_poly_reg, X, y, [-1, 1, 0, 1])\n",
|
||||||
"plt.title(f\"$degree={svm_poly_reg1.degree}, \"\n",
|
"plt.title(f\"$degree={svm_poly_reg[-1].degree}, \"\n",
|
||||||
" f\"C={svm_poly_reg1.C}, \"\n",
|
" f\"C={svm_poly_reg[-1].C}, \"\n",
|
||||||
" f\"\\\\epsilon={svm_poly_reg1.epsilon}$\")\n",
|
" fr\"\\epsilon={svm_poly_reg[-1].epsilon}$\")\n",
|
||||||
"plt.ylabel(r\"$y$\", rotation=0)\n",
|
"plt.ylabel(r\"$y$\", rotation=0)\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.sca(axes[1])\n",
|
"plt.sca(axes[1])\n",
|
||||||
"plot_svm_regression(svm_poly_reg2, X, y, [-1, 1, 0, 1])\n",
|
"plot_svm_regression(svm_poly_reg2, X, y, [-1, 1, 0, 1])\n",
|
||||||
"plt.title(f\"$degree={svm_poly_reg2.degree}, \"\n",
|
"plt.title(f\"$degree={svm_poly_reg2[-1].degree}, \"\n",
|
||||||
" f\"C={svm_poly_reg2.C}, \"\n",
|
" f\"C={svm_poly_reg2[-1].C}, \"\n",
|
||||||
" f\"\\\\epsilon={svm_poly_reg2.epsilon}$\")\n",
|
" fr\"\\epsilon={svm_poly_reg2[-1].epsilon}$\")\n",
|
||||||
"plt.grid()\n",
|
"plt.grid()\n",
|
||||||
"save_fig(\"svm_with_polynomial_kernel_plot\")\n",
|
"save_fig(\"svm_with_polynomial_kernel_plot\")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
|
@ -900,90 +908,46 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Extra Material – Under the Hood"
|
"# Under the hood"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 26,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"X = iris.data[[\"petal length (cm)\", \"petal width (cm)\"]].values\n",
|
"# not in the book – this cell generates and saves Figure 5–12\n",
|
||||||
"y = (iris.target == 2)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 28,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from mpl_toolkits.mplot3d import Axes3D\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"def plot_3D_decision_function(ax, w, b, x1_lim=[4, 6], x2_lim=[0.8, 2.8]):\n",
|
|
||||||
" x1_in_bounds = (X[:, 0] > x1_lim[0]) & (X[:, 0] < x1_lim[1])\n",
|
|
||||||
" X_crop = X[x1_in_bounds]\n",
|
|
||||||
" y_crop = y[x1_in_bounds]\n",
|
|
||||||
" x1s = np.linspace(x1_lim[0], x1_lim[1], 20)\n",
|
|
||||||
" x2s = np.linspace(x2_lim[0], x2_lim[1], 20)\n",
|
|
||||||
" x1, x2 = np.meshgrid(x1s, x2s)\n",
|
|
||||||
" xs = np.c_[x1.ravel(), x2.ravel()]\n",
|
|
||||||
" df = (xs.dot(w) + b).reshape(x1.shape)\n",
|
|
||||||
" m = 1 / np.linalg.norm(w)\n",
|
|
||||||
" boundary_x2s = -x1s * (w[0] / w[1]) - b / w[1]\n",
|
|
||||||
" margin_x2s_1 = -x1s * (w[0] / w[1]) - (b - 1) / w[1]\n",
|
|
||||||
" margin_x2s_2 = -x1s * (w[0] / w[1]) - (b + 1) / w[1]\n",
|
|
||||||
" ax.plot_surface(x1s, x2, np.zeros_like(x1),\n",
|
|
||||||
" color=\"b\", alpha=0.2, cstride=100, rstride=100)\n",
|
|
||||||
" ax.plot(x1s, boundary_x2s, 0, \"k-\", linewidth=2, label=r\"$h=0$\")\n",
|
|
||||||
" ax.plot(x1s, margin_x2s_1, 0, \"k--\", linewidth=2, label=r\"$h=\\pm 1$\")\n",
|
|
||||||
" ax.plot(x1s, margin_x2s_2, 0, \"k--\", linewidth=2)\n",
|
|
||||||
" ax.plot(X_crop[:, 0][y_crop==1], X_crop[:, 1][y_crop==1], 0, \"g^\")\n",
|
|
||||||
" ax.plot_wireframe(x1, x2, df, alpha=0.3, color=\"k\")\n",
|
|
||||||
" ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, \"bs\")\n",
|
|
||||||
" ax.axis(x1_lim + x2_lim)\n",
|
|
||||||
" ax.text(4.5, 2.5, 3.8, \"Decision function $h$\", fontsize=14)\n",
|
|
||||||
" ax.set_xlabel(r\"Petal length\", labelpad=10)\n",
|
|
||||||
" ax.set_ylabel(r\"Petal width\", labelpad=10)\n",
|
|
||||||
" ax.set_zlabel(r\"$h = \\mathbf{w}^T \\mathbf{x} + b$\", labelpad=5)\n",
|
|
||||||
" ax.legend(loc=\"upper left\")\n",
|
|
||||||
"\n",
|
|
||||||
"fig = plt.figure(figsize=(11, 6))\n",
|
|
||||||
"ax1 = fig.add_subplot(111, projection='3d')\n",
|
|
||||||
"plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])\n",
|
|
||||||
"\n",
|
|
||||||
"save_fig(\"iris_3D_plot\")\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 29,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import matplotlib.patches as patches\n",
|
"import matplotlib.patches as patches\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def plot_2D_decision_function(w, b, ylabel=True, x1_lim=[-3, 3]):\n",
|
"def plot_2D_decision_function(w, b, ylabel=True, x1_lim=[-3, 3]):\n",
|
||||||
" x1 = np.linspace(x1_lim[0], x1_lim[1], 200)\n",
|
" x1 = np.linspace(x1_lim[0], x1_lim[1], 200)\n",
|
||||||
" y = w * x1 + b\n",
|
" y = w * x1 + b\n",
|
||||||
" m = 1 / w\n",
|
" half_margin = 1 / w\n",
|
||||||
"\n",
|
"\n",
|
||||||
" plt.plot(x1, y)\n",
|
" plt.plot(x1, y, \"b-\", linewidth=2, label=r\"$s = w_1 x_1$\")\n",
|
||||||
" plt.axhline(y=0, color='k')\n",
|
" plt.axhline(y=0, color='k', linewidth=1)\n",
|
||||||
" plt.axvline(x=0, color='k')\n",
|
" plt.axvline(x=0, color='k', linewidth=1)\n",
|
||||||
" rect = patches.Rectangle((-3, -1), 6, 2, edgecolor='none', facecolor='blue',\n",
|
" rect = patches.Rectangle((-half_margin, -2), 2 * half_margin, 4,\n",
|
||||||
" alpha=0.1)\n",
|
" edgecolor='none', facecolor='gray', alpha=0.2)\n",
|
||||||
" plt.gca().add_patch(rect)\n",
|
" plt.gca().add_patch(rect)\n",
|
||||||
" plt.plot([m, m], [0, 1], \"b--\")\n",
|
" plt.plot([-3, 3], [1, 1], \"k--\", linewidth=1)\n",
|
||||||
" plt.plot([-m, -m], [0, -1], \"b--\")\n",
|
" plt.plot([-3, 3], [-1, -1], \"k--\", linewidth=1)\n",
|
||||||
" plt.plot([-m, m], [0, 0], \"k-o\", linewidth=3)\n",
|
" plt.plot(half_margin, 1, \"k.\")\n",
|
||||||
|
" plt.plot(-half_margin, -1, \"k.\")\n",
|
||||||
" plt.axis(x1_lim + [-2, 2])\n",
|
" plt.axis(x1_lim + [-2, 2])\n",
|
||||||
" plt.xlabel(r\"$x_1$\")\n",
|
" plt.xlabel(r\"$x_1$\")\n",
|
||||||
" if ylabel:\n",
|
" if ylabel:\n",
|
||||||
" plt.ylabel(r\"$w_1 x_1$ \", rotation=0)\n",
|
" plt.ylabel(\"$s$\", rotation=0, labelpad=5)\n",
|
||||||
" plt.title(r\"$w_1 = {}$\".format(w))\n",
|
" plt.legend()\n",
|
||||||
|
" plt.text(1.02, -1.6, \"Margin\", ha=\"left\", va=\"center\",\n",
|
||||||
|
" color=\"k\", fontsize=14)\n",
|
||||||
|
" plt.annotate(\n",
|
||||||
|
" '', xy=(-half_margin, -1.6), xytext=(half_margin, -1.6),\n",
|
||||||
|
" arrowprops={'ec': 'k', 'arrowstyle': '<->', 'linewidth': 1.5}\n",
|
||||||
|
" )\n",
|
||||||
|
" plt.title(fr\"$w_1 = {w}$\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig, axes = plt.subplots(ncols=2, figsize=(9, 3.2), sharey=True)\n",
|
"fig, axes = plt.subplots(ncols=2, figsize=(9, 3.2), sharey=True)\n",
|
||||||
"plt.sca(axes[0])\n",
|
"plt.sca(axes[0])\n",
|
||||||
|
@ -996,31 +960,36 @@
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"**Code to generate the Hinge Loss figure:**"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 27,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"t = np.linspace(-2, 4, 200)\n",
|
"# not in the book – this cell generates and saves Figure 5–13\n",
|
||||||
"h = np.where(1 - t < 0, 0, 1 - t) # max(0, 1-t)\n",
|
"\n",
|
||||||
|
"s = np.linspace(-2.5, 2.5, 200)\n",
|
||||||
|
"hinge_pos = np.where(1 - s < 0, 0, 1 - s) # max(0, 1 - s)\n",
|
||||||
|
"hinge_neg = np.where(1 + s < 0, 0, 1 + s) # max(0, 1 + s)\n",
|
||||||
|
"\n",
|
||||||
|
"titles = (r\"Hinge loss = $max(0, 1 - s\\,t)$\", r\"Squared Hinge loss\")\n",
|
||||||
|
"\n",
|
||||||
|
"fix, axs = plt.subplots(1, 2, sharey=True, figsize=(8.2, 3))\n",
|
||||||
|
"\n",
|
||||||
|
"for ax, loss_pos, loss_neg, title in zip(\n",
|
||||||
|
" axs, (hinge_pos, hinge_pos ** 2), (hinge_neg, hinge_neg ** 2), titles):\n",
|
||||||
|
" ax.plot(s, loss_pos, \"g-\", linewidth=2, zorder=10, label=\"$t=1$\")\n",
|
||||||
|
" ax.plot(s, loss_neg, \"r--\", linewidth=2, zorder=10, label=\"$t=-1$\")\n",
|
||||||
|
" ax.grid(True, which='both')\n",
|
||||||
|
" ax.axhline(y=0, color='k')\n",
|
||||||
|
" ax.axvline(x=0, color='k')\n",
|
||||||
|
" ax.set_xlabel(r\"$s = \\mathbf{w}^\\intercal \\mathbf{x} + b$\")\n",
|
||||||
|
" ax.axis([-2.5, 2.5, -0.5, 2.5])\n",
|
||||||
|
" ax.legend(loc=\"center right\")\n",
|
||||||
|
" ax.set_title(title)\n",
|
||||||
|
" ax.set_yticks(np.arange(0, 2.5, 1))\n",
|
||||||
|
" ax.set_aspect(\"equal\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.figure(figsize=(5,2.8))\n",
|
|
||||||
"plt.plot(t, h, \"b-\", linewidth=2, label=\"$max(0, 1 - t)$\", zorder=10)\n",
|
|
||||||
"plt.grid(True, which='both')\n",
|
|
||||||
"plt.axhline(y=0, color='k')\n",
|
|
||||||
"plt.axvline(x=0, color='k')\n",
|
|
||||||
"plt.yticks(np.arange(-1, 2.5, 1))\n",
|
|
||||||
"plt.xlabel(\"$t$\")\n",
|
|
||||||
"plt.axis([-2, 4, -1, 2.5])\n",
|
|
||||||
"plt.legend(loc=\"upper right\")\n",
|
|
||||||
"save_fig(\"hinge_plot\")\n",
|
"save_fig(\"hinge_plot\")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
|
@ -1029,59 +998,19 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Extra material – Training Time"
|
"# Extra Material"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 31,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"X, y = make_moons(n_samples=1000, noise=0.4, random_state=42)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
|
|
||||||
"plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")\n",
|
|
||||||
"plt.grid()\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 32,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"tol = 0.1\n",
|
|
||||||
"tols = []\n",
|
|
||||||
"times = []\n",
|
|
||||||
"for i in range(10):\n",
|
|
||||||
" svm_clf = SVC(kernel=\"poly\", gamma=3, C=10, tol=tol)\n",
|
|
||||||
" t1 = time.time()\n",
|
|
||||||
" svm_clf.fit(X, y)\n",
|
|
||||||
" t2 = time.time()\n",
|
|
||||||
" times.append(t2-t1)\n",
|
|
||||||
" tols.append(tol)\n",
|
|
||||||
" tol /= 10\n",
|
|
||||||
"plt.semilogx(tols, times, \"bo-\")\n",
|
|
||||||
"plt.xlabel(\"Tolerance\")\n",
|
|
||||||
"plt.ylabel(\"Time (seconds)\")\n",
|
|
||||||
"plt.grid()\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Extra Material – Linear SVM classifier implementation using Batch Gradient Descent"
|
"## Linear SVM classifier implementation using Batch Gradient Descent"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 28,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1091,7 +1020,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1152,7 +1081,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 35,
|
"execution_count": 30,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1165,7 +1094,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 36,
|
"execution_count": 31,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1179,7 +1108,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 37,
|
"execution_count": 32,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1188,7 +1117,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 33,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1199,7 +1128,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 39,
|
"execution_count": 34,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1230,7 +1159,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 40,
|
"execution_count": 35,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
|
@ -1275,7 +1204,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## 1. to 7."
|
"## 1. to 8."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1289,14 +1218,14 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 8."
|
"# 9."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"_Exercise: train a `LinearSVC` on a linearly separable dataset. Then train an `SVC` and a `SGDClassifier` on the same dataset. See if you can get them to produce roughly the same model._"
|
"_Exercise: Train a `LinearSVC` on a linearly separable dataset. Then train an `SVC` and a `SGDClassifier` on the same dataset. See if you can get them to produce roughly the same model._"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1308,7 +1237,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 41,
|
"execution_count": 36,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1323,9 +1252,19 @@
|
||||||
"y = y[setosa_or_versicolor]"
|
"y = y[setosa_or_versicolor]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now let's build and train 3 models:\n",
|
||||||
|
"* Remember that `LinearSVC` uses `loss=\"squared_hinge\"` by default, so if we want all 3 models to produce similar results, we need to set `loss=\"hinge\"`.\n",
|
||||||
|
"* Also, the `SVC` class uses an RBF kernel by default, so we need to set `kernel=\"linear\"` to get similar results as the other two models.\n",
|
||||||
|
"* Lastly, the `SGDClassifier` class does not have a `C` hyperparameter, but it has another regularization hyperparameter called `alpha`, so we can tweak it to get similar results as the other two models."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 37,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1334,24 +1273,14 @@
|
||||||
"from sklearn.preprocessing import StandardScaler\n",
|
"from sklearn.preprocessing import StandardScaler\n",
|
||||||
"\n",
|
"\n",
|
||||||
"C = 5\n",
|
"C = 5\n",
|
||||||
"alpha = 1 / (C * len(X))\n",
|
"alpha = 0.05\n",
|
||||||
"\n",
|
|
||||||
"lin_clf = LinearSVC(loss=\"hinge\", C=C, random_state=42)\n",
|
|
||||||
"svm_clf = SVC(kernel=\"linear\", C=C)\n",
|
|
||||||
"sgd_clf = SGDClassifier(loss=\"hinge\", learning_rate=\"constant\", eta0=0.001,\n",
|
|
||||||
" alpha=alpha, max_iter=1000, tol=1e-3, random_state=42)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"scaler = StandardScaler()\n",
|
"scaler = StandardScaler()\n",
|
||||||
"X_scaled = scaler.fit_transform(X)\n",
|
"X_scaled = scaler.fit_transform(X)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"lin_clf.fit(X_scaled, y)\n",
|
"lin_clf = LinearSVC(loss=\"hinge\", C=C, random_state=42).fit(X_scaled, y)\n",
|
||||||
"svm_clf.fit(X_scaled, y)\n",
|
"svc_clf = SVC(kernel=\"linear\", C=C).fit(X_scaled, y)\n",
|
||||||
"sgd_clf.fit(X_scaled, y)\n",
|
"sgd_clf = SGDClassifier(alpha=alpha, random_state=42).fit(X_scaled, y)"
|
||||||
"\n",
|
|
||||||
"print(\"LinearSVC: \", lin_clf.intercept_, lin_clf.coef_)\n",
|
|
||||||
"print(\"SVC: \", svm_clf.intercept_, svm_clf.coef_)\n",
|
|
||||||
"print(f\"SGDClassifier(alpha={sgd_clf.alpha:.1e}):\",\n",
|
|
||||||
" sgd_clf.intercept_, sgd_clf.coef_)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1363,28 +1292,24 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 43,
|
"execution_count": 38,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Compute the slope and bias of each decision boundary\n",
|
"def compute_decision_boundary(model):\n",
|
||||||
"w1 = -lin_clf.coef_[0, 0] / lin_clf.coef_[0, 1]\n",
|
" w = -model.coef_[0, 0] / model.coef_[0, 1]\n",
|
||||||
"b1 = -lin_clf.intercept_[0] / lin_clf.coef_[0, 1]\n",
|
" b = -model.intercept_[0] / model.coef_[0, 1]\n",
|
||||||
"w2 = -svm_clf.coef_[0, 0] / svm_clf.coef_[0, 1]\n",
|
" return scaler.inverse_transform([[-10, -10 * w + b], [10, 10 * w + b]])\n",
|
||||||
"b2 = -svm_clf.intercept_[0] / svm_clf.coef_[0, 1]\n",
|
|
||||||
"w3 = -sgd_clf.coef_[0, 0] / sgd_clf.coef_[0, 1]\n",
|
|
||||||
"b3 = -sgd_clf.intercept_[0] / sgd_clf.coef_[0, 1]\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Transform the decision boundary lines back to the original scale\n",
|
"lin_line = compute_decision_boundary(lin_clf)\n",
|
||||||
"line1 = scaler.inverse_transform([[-10, -10 * w1 + b1], [10, 10 * w1 + b1]])\n",
|
"svc_line = compute_decision_boundary(svc_clf)\n",
|
||||||
"line2 = scaler.inverse_transform([[-10, -10 * w2 + b2], [10, 10 * w2 + b2]])\n",
|
"sgd_line = compute_decision_boundary(sgd_clf)\n",
|
||||||
"line3 = scaler.inverse_transform([[-10, -10 * w3 + b3], [10, 10 * w3 + b3]])\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Plot all three decision boundaries\n",
|
"# Plot all three decision boundaries\n",
|
||||||
"plt.figure(figsize=(11, 4))\n",
|
"plt.figure(figsize=(11, 4))\n",
|
||||||
"plt.plot(line1[:, 0], line1[:, 1], \"k:\", label=\"LinearSVC\")\n",
|
"plt.plot(lin_line[:, 0], lin_line[:, 1], \"k:\", label=\"LinearSVC\")\n",
|
||||||
"plt.plot(line2[:, 0], line2[:, 1], \"b--\", linewidth=2, label=\"SVC\")\n",
|
"plt.plot(svc_line[:, 0], svc_line[:, 1], \"b--\", linewidth=2, label=\"SVC\")\n",
|
||||||
"plt.plot(line3[:, 0], line3[:, 1], \"r-\", label=\"SGDClassifier\")\n",
|
"plt.plot(sgd_line[:, 0], sgd_line[:, 1], \"r-\", label=\"SGDClassifier\")\n",
|
||||||
"plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\") # label=\"Iris versicolor\"\n",
|
"plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\") # label=\"Iris versicolor\"\n",
|
||||||
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\") # label=\"Iris setosa\"\n",
|
"plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\") # label=\"Iris setosa\"\n",
|
||||||
"plt.xlabel(\"Petal length\")\n",
|
"plt.xlabel(\"Petal length\")\n",
|
||||||
|
@ -1407,14 +1332,14 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 9."
|
"# 10."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"_Exercise: train an SVM classifier on the Wine dataset, which you can load using `sklearn.datasets.load_wine()`. This dataset contains the chemical analysis of 178 wine samples produced by 3 different cultivators: the goal is to train a classification model capable of predicting the cultivator based on the wine's chemical analysis. Since SVM classifiers are binary classifiers, you will need to use one-versus-all to classify all 3 classes. You may want to tune the hyperparameters using small validation sets to speed up the process. What accuracy can you reach?_"
|
"_Exercise: Train an SVM classifier on the Wine dataset, which you can load using `sklearn.datasets.load_wine()`. This dataset contains the chemical analysis of 178 wine samples produced by 3 different cultivators: the goal is to train a classification model capable of predicting the cultivator based on the wine's chemical analysis. Since SVM classifiers are binary classifiers, you will need to use one-versus-all to classify all 3 classes. What accuracy can you reach?_"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1426,7 +1351,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 44,
|
"execution_count": 39,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1437,7 +1362,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 45,
|
"execution_count": 40,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1446,7 +1371,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 41,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1458,7 +1383,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 47,
|
"execution_count": 42,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1467,7 +1392,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 48,
|
"execution_count": 43,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1483,7 +1408,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 49,
|
"execution_count": 44,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1500,7 +1425,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 50,
|
"execution_count": 45,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1519,7 +1444,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 51,
|
"execution_count": 46,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1539,7 +1464,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 52,
|
"execution_count": 47,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1557,7 +1482,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 53,
|
"execution_count": 48,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1582,7 +1507,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 54,
|
"execution_count": 49,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1599,7 +1524,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 55,
|
"execution_count": 50,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1618,7 +1543,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 56,
|
"execution_count": 51,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1634,7 +1559,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 57,
|
"execution_count": 52,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1652,14 +1577,14 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## 10."
|
"## 11."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"_Exercise: Train and fine-tune an SVM regressor on the California housing dataset. You can use the original dataset rather than the tweaked version we used in Chapter 2. The original dataset can be fetched using `sklearn.datasets.fetch_california_housing()`. Since there are over 20,000 instances, SVMs can be slow, so for hyperparameter tuning you should use much less instances (e.g., 2,000), to test many more hyperparameter combinations._"
|
"_Exercise: Train and fine-tune an SVM regressor on the California housing dataset. You can use the original dataset rather than the tweaked version we used in Chapter 2. The original dataset can be fetched using `sklearn.datasets.fetch_california_housing()`. The labels represent hundreds of thousands of dollars. Since there are over 20,000 instances, SVMs can be slow, so for hyperparameter tuning you should use much less instances (e.g., 2,000), to test many more hyperparameter combinations. What is your best model's RMSE?_"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1671,7 +1596,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 58,
|
"execution_count": 53,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1691,13 +1616,14 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 59,
|
"execution_count": 54,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,\n",
|
||||||
|
" random_state=42)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1716,7 +1642,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 60,
|
"execution_count": 55,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1735,7 +1661,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 61,
|
"execution_count": 56,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1753,7 +1679,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 62,
|
"execution_count": 57,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1773,7 +1699,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 63,
|
"execution_count": 58,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1789,7 +1715,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 64,
|
"execution_count": 59,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1810,7 +1736,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 65,
|
"execution_count": 60,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1819,7 +1745,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 66,
|
"execution_count": 61,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1836,7 +1762,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 67,
|
"execution_count": 62,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
|
Loading…
Reference in New Issue