handson-ml/04_training_linear_models.i...

1355 lines
37 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"**Chapter 4 Training Linear Models**"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"_This notebook contains all the sample code and solutions to the exercices in chapter 4._"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import numpy.random as rnd\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"rnd.seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"training_linear_models\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Linear regression using the Normal Equation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X = 2 * rnd.rand(100, 1)\n",
"y = 4 + 3 * X + rnd.randn(100, 1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([0, 2, 0, 15])\n",
"save_fig(\"generated_data_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import numpy.linalg as LA\n",
"\n",
"X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance\n",
"theta_best = LA.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta_best"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X_new = np.array([[0], [2]])\n",
"X_new_b = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance\n",
"y_predict = X_new_b.dot(theta_best)\n",
"y_predict"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.plot(X_new, y_predict, \"r-\", linewidth=2, label=\"Predictions\")\n",
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.legend(loc=\"upper left\", fontsize=14)\n",
"plt.axis([0, 2, 0, 15])\n",
"save_fig(\"linear_model_predictions\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"lin_reg = LinearRegression()\n",
"lin_reg.fit(X, y)\n",
"lin_reg.intercept_, lin_reg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"lin_reg.predict(X_new)"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Linear regression using batch gradient descent"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta_path_bgd = []\n",
"\n",
"def plot_gradient_descent(theta, eta, theta_path=None):\n",
" m = len(X_b)\n",
" plt.plot(X, y, \"b.\")\n",
" n_iterations = 1000\n",
" for iteration in range(n_iterations):\n",
" if iteration < 10:\n",
" y_predict = X_new_b.dot(theta)\n",
" style = \"b-\" if iteration > 0 else \"r--\"\n",
" plt.plot(X_new, y_predict, style)\n",
" gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n",
" theta = theta - eta * gradients\n",
" if theta_path is not None:\n",
" theta_path.append(theta)\n",
" plt.xlabel(\"$x_1$\", fontsize=18)\n",
" plt.axis([0, 2, 0, 15])\n",
" plt.title(r\"$\\eta = {}$\".format(eta), fontsize=16)\n",
"\n",
"rnd.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(132); plot_gradient_descent(theta, eta=0.1, theta_path=theta_path_bgd)\n",
"plt.subplot(133); plot_gradient_descent(theta, eta=0.5)\n",
"\n",
"save_fig(\"gradient_descent_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Stochastic Gradient Descent"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta_path_sgd = []\n",
"\n",
"n_iterations = 50\n",
"t0, t1 = 5, 50 # learning schedule hyperparameters\n",
"\n",
"rnd.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n",
"\n",
"def learning_schedule(t):\n",
" return t0 / (t + t1)\n",
"\n",
"m = len(X_b)\n",
"\n",
"for epoch in range(n_iterations):\n",
" for i in range(m):\n",
" if epoch == 0 and i < 20:\n",
" y_predict = X_new_b.dot(theta)\n",
" style = \"b-\" if i > 0 else \"r--\"\n",
" plt.plot(X_new, y_predict, style)\n",
" random_index = rnd.randint(m)\n",
" xi = X_b[random_index:random_index+1]\n",
" yi = y[random_index:random_index+1]\n",
" gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
" eta = learning_schedule(epoch * m + i)\n",
" theta = theta - eta * gradients\n",
" theta_path_sgd.append(theta)\n",
"\n",
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([0, 2, 0, 15])\n",
"save_fig(\"sgd_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import SGDRegressor\n",
"sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)\n",
"sgd_reg.fit(X, y.ravel())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"sgd_reg.intercept_, sgd_reg.coef_"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Mini-batch gradient descent"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta_path_mgd = []\n",
"\n",
"n_iterations = 50\n",
"minibatch_size = 20\n",
"\n",
"rnd.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n",
"\n",
"t0, t1 = 10, 1000\n",
"def learning_schedule(t):\n",
" return t0 / (t + t1)\n",
"\n",
"t = 0\n",
"for epoch in range(n_iterations):\n",
" shuffled_indices = rnd.permutation(m)\n",
" X_b_shuffled = X_b[shuffled_indices]\n",
" y_shuffled = y[shuffled_indices]\n",
" for i in range(0, m, minibatch_size):\n",
" t += 1\n",
" xi = X_b_shuffled[i:i+minibatch_size]\n",
" yi = y_shuffled[i:i+minibatch_size]\n",
" gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
" eta = learning_schedule(t)\n",
" theta = theta - eta * gradients\n",
" theta_path_mgd.append(theta)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"theta_path_bgd = np.array(theta_path_bgd)\n",
"theta_path_sgd = np.array(theta_path_sgd)\n",
"theta_path_mgd = np.array(theta_path_mgd)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.figure(figsize=(7,4))\n",
"plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], \"r-s\", linewidth=1, label=\"Stochastic\")\n",
"plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], \"g-+\", linewidth=2, label=\"Mini-batch\")\n",
"plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], \"b-o\", linewidth=3, label=\"Batch\")\n",
"plt.legend(loc=\"upper left\", fontsize=16)\n",
"plt.xlabel(r\"$\\theta_0$\", fontsize=20)\n",
"plt.ylabel(r\"$\\theta_1$ \", fontsize=20, rotation=0)\n",
"plt.axis([2.5, 4.5, 2.3, 3.9])\n",
"save_fig(\"gradient_descent_paths_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Polynomial regression"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import numpy.random as rnd\n",
"\n",
"rnd.seed(42)\n",
"m = 100\n",
"X = 6 * rnd.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([-3, 3, 0, 10])\n",
"save_fig(\"quadratic_data_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.preprocessing import PolynomialFeatures\n",
"poly_features = PolynomialFeatures(degree=2, include_bias=False)\n",
"X_poly = poly_features.fit_transform(X)\n",
"X[0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X_poly[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"lin_reg = LinearRegression()\n",
"lin_reg.fit(X_poly, y)\n",
"lin_reg.intercept_, lin_reg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X_new=np.linspace(-3, 3, 100).reshape(100, 1)\n",
"X_new_poly = poly_features.transform(X_new)\n",
"y_new = lin_reg.predict(X_new_poly)\n",
"plt.plot(X, y, \"b.\")\n",
"plt.plot(X_new, y_new, \"r-\", linewidth=2, label=\"Predictions\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.legend(loc=\"upper left\", fontsize=14)\n",
"plt.axis([-3, 3, 0, 10])\n",
"save_fig(\"quadratic_predictions_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"for style, width, degree in ((\"g-\", 1, 300), (\"b--\", 2, 2), (\"r-+\", 2, 1)):\n",
" polybig_features = PolynomialFeatures(degree=degree, include_bias=False)\n",
" std_scaler = StandardScaler()\n",
" lin_reg = LinearRegression()\n",
" polynomial_regression = Pipeline((\n",
" (\"poly_features\", polybig_features),\n",
" (\"std_scaler\", std_scaler),\n",
" (\"lin_reg\", lin_reg),\n",
" ))\n",
" polynomial_regression.fit(X, y)\n",
" y_newbig = polynomial_regression.predict(X_new)\n",
" plt.plot(X_new, y_newbig, style, label=str(degree), linewidth=width)\n",
"\n",
"plt.plot(X, y, \"b.\", linewidth=3)\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([-3, 3, 0, 10])\n",
"save_fig(\"high_degree_polynomials_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def plot_learning_curves(model, X, y):\n",
" X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=10)\n",
" train_errors, val_errors = [], []\n",
" for m in range(1, len(X_train)):\n",
" model.fit(X_train[:m], y_train[:m])\n",
" y_train_predict = model.predict(X_train[:m])\n",
" y_val_predict = model.predict(X_val)\n",
" train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))\n",
" val_errors.append(mean_squared_error(y_val_predict, y_val))\n",
"\n",
" plt.plot(np.sqrt(train_errors), \"r-+\", linewidth=2, label=\"Training set\")\n",
" plt.plot(np.sqrt(val_errors), \"b-\", linewidth=3, label=\"Validation set\")\n",
" plt.legend(loc=\"upper right\", fontsize=14)\n",
" plt.xlabel(\"Training set size\", fontsize=14)\n",
" plt.ylabel(\"RMSE\", fontsize=14)\n",
"\n",
"lin_reg = LinearRegression()\n",
"plot_learning_curves(lin_reg, X, y)\n",
"plt.axis([0, 80, 0, 3])\n",
"save_fig(\"underfitting_learning_curves_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"\n",
"polynomial_regression = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n",
" (\"sgd_reg\", LinearRegression()),\n",
" ))\n",
"\n",
"plot_learning_curves(polynomial_regression, X, y)\n",
"plt.axis([0, 80, 0, 3])\n",
"save_fig(\"learning_curves_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Regularized models"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"\n",
"rnd.seed(42)\n",
"m = 20\n",
"X = 3 * rnd.rand(m, 1)\n",
"y = 1 + 0.5 * X + rnd.randn(m, 1) / 1.5\n",
"X_new = np.linspace(0, 3, 100).reshape(100, 1)\n",
"\n",
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
" for alpha, style in zip(alphas, (\"b-\", \"g--\", \"r:\")):\n",
" model = model_class(alpha, **model_kargs) if alpha > 0 else LinearRegression()\n",
" if polynomial:\n",
" model = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n",
" (\"std_scaler\", StandardScaler()),\n",
" (\"regul_reg\", model),\n",
" ))\n",
" model.fit(X, y)\n",
" y_new_regul = model.predict(X_new)\n",
" lw = 2 if alpha > 0 else 1\n",
" plt.plot(X_new, y_new_regul, style, linewidth=lw, label=r\"$\\alpha = {}$\".format(alpha))\n",
" plt.plot(X, y, \"b.\", linewidth=3)\n",
" plt.legend(loc=\"upper left\", fontsize=15)\n",
" plt.xlabel(\"$x_1$\", fontsize=18)\n",
" plt.axis([0, 3, 0, 4])\n",
"\n",
"plt.figure(figsize=(8,4))\n",
"plt.subplot(121)\n",
"plot_model(Ridge, polynomial=False, alphas=(0, 10, 100))\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n",
"plot_model(Ridge, polynomial=True, alphas=(0, 10**-5, 1))\n",
"\n",
"save_fig(\"ridge_regression_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\")\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"sgd_reg = SGDRegressor(penalty=\"l2\", random_state=42)\n",
"sgd_reg.fit(X, y.ravel())\n",
"sgd_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\")\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import Lasso\n",
"\n",
"plt.figure(figsize=(8,4))\n",
"plt.subplot(121)\n",
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1))\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n",
"plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1)\n",
"\n",
"save_fig(\"lasso_regression_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import Lasso\n",
"lasso_reg = Lasso(alpha=0.1)\n",
"lasso_reg.fit(X, y)\n",
"lasso_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import ElasticNet\n",
"elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)\n",
"elastic_net.fit(X, y)\n",
"elastic_net.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true,
"scrolled": true
},
"outputs": [],
"source": [
"rnd.seed(42)\n",
"m = 100\n",
"X = 6 * rnd.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n",
"\n",
"poly_scaler = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=90, include_bias=False)),\n",
" (\"std_scaler\", StandardScaler()),\n",
" ))\n",
"\n",
"X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n",
"X_val_poly_scaled = poly_scaler.transform(X_val)\n",
"\n",
"sgd_reg = SGDRegressor(n_iter=1,\n",
" penalty=None,\n",
" eta0=0.0005,\n",
" warm_start=True,\n",
" learning_rate=\"constant\",\n",
" random_state=42)\n",
"\n",
"n_epochs = 500\n",
"train_errors, val_errors = [], []\n",
"for epoch in range(n_epochs):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train)\n",
" y_train_predict = sgd_reg.predict(X_train_poly_scaled)\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" train_errors.append(mean_squared_error(y_train_predict, y_train))\n",
" val_errors.append(mean_squared_error(y_val_predict, y_val))\n",
"\n",
"best_epoch = np.argmin(val_errors)\n",
"best_val_rmse = np.sqrt(val_errors[best_epoch])\n",
"\n",
"plt.annotate('Best model',\n",
" xy=(best_epoch, best_val_rmse),\n",
" xytext=(best_epoch, best_val_rmse + 1),\n",
" ha=\"center\",\n",
" arrowprops=dict(facecolor='black', shrink=0.05),\n",
" fontsize=16,\n",
" )\n",
"\n",
"best_val_rmse -= 0.03 # just to make the graph look better\n",
"plt.plot([0, n_epochs], [best_val_rmse, best_val_rmse], \"k:\", linewidth=2)\n",
"plt.plot(np.sqrt(val_errors), \"b-\", linewidth=3, label=\"Validation set\")\n",
"plt.plot(np.sqrt(train_errors), \"r--\", linewidth=2, label=\"Training set\")\n",
"plt.legend(loc=\"upper right\", fontsize=14)\n",
"plt.xlabel(\"Epoch\", fontsize=14)\n",
"plt.ylabel(\"RMSE\", fontsize=14)\n",
"save_fig(\"early_stopping_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.base import clone\n",
"sgd_reg = SGDRegressor(n_iter=1, warm_start=True, penalty=None,\n",
" learning_rate=\"constant\", eta0=0.0005,\n",
" random_state=42)\n",
"\n",
"minimum_val_error = float(\"inf\")\n",
"best_epoch = None\n",
"best_model = None\n",
"for epoch in range(1000):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" val_error = mean_squared_error(y_val_predict, y_val)\n",
" if val_error < minimum_val_error:\n",
" minimum_val_error = val_error\n",
" best_epoch = epoch\n",
" best_model = clone(sgd_reg)\n",
"\n",
"best_epoch, best_model"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
"\n",
"# ignoring bias term\n",
"t1s = np.linspace(t1a, t1b, 500)\n",
"t2s = np.linspace(t2a, t2b, 500)\n",
"t1, t2 = np.meshgrid(t1s, t2s)\n",
"T = np.c_[t1.ravel(), t2.ravel()]\n",
"Xr = np.array([[-1, 1], [-0.3, -1], [1, 0.1]])\n",
"yr = 2 * Xr[:, :1] + 0.5 * Xr[:, 1:]\n",
"\n",
"J = (1/len(Xr) * np.sum((T.dot(Xr.T) - yr.T)**2, axis=1)).reshape(t1.shape)\n",
"\n",
"N1 = np.linalg.norm(T, ord=1, axis=1).reshape(t1.shape)\n",
"N2 = np.linalg.norm(T, ord=2, axis=1).reshape(t1.shape)\n",
"\n",
"t_min_idx = np.unravel_index(np.argmin(J), J.shape)\n",
"t1_min, t2_min = t1[t_min_idx], t2[t_min_idx]\n",
"\n",
"t_init = np.array([[0.25], [-1]])"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def bgd_path(theta, X, y, l1, l2, core = 1, eta = 0.1, n_iterations = 50):\n",
" path = [theta]\n",
" for iteration in range(n_iterations):\n",
" gradients = core * 2/len(X) * X.T.dot(X.dot(theta) - y) + l1 * np.sign(theta) + 2 * l2 * theta\n",
"\n",
" theta = theta - eta * gradients\n",
" path.append(theta)\n",
" return np.array(path)\n",
"\n",
"plt.figure(figsize=(12, 8))\n",
"for i, N, l1, l2, title in ((0, N1, 0.5, 0, \"Lasso\"), (1, N2, 0, 0.1, \"Ridge\")):\n",
" JR = J + l1 * N1 + l2 * N2**2\n",
" \n",
" tr_min_idx = np.unravel_index(np.argmin(JR), JR.shape)\n",
" t1r_min, t2r_min = t1[tr_min_idx], t2[tr_min_idx]\n",
"\n",
" levelsJ=(np.exp(np.linspace(0, 1, 20)) - 1) * (np.max(J) - np.min(J)) + np.min(J)\n",
" levelsJR=(np.exp(np.linspace(0, 1, 20)) - 1) * (np.max(JR) - np.min(JR)) + np.min(JR)\n",
" levelsN=np.linspace(0, np.max(N), 10)\n",
" \n",
" path_J = bgd_path(t_init, Xr, yr, l1=0, l2=0)\n",
" path_JR = bgd_path(t_init, Xr, yr, l1, l2)\n",
" path_N = bgd_path(t_init, Xr, yr, np.sign(l1)/3, np.sign(l2), core=0)\n",
"\n",
" plt.subplot(221 + i * 2)\n",
" plt.grid(True)\n",
" plt.axhline(y=0, color='k')\n",
" plt.axvline(x=0, color='k')\n",
" plt.contourf(t1, t2, J, levels=levelsJ, alpha=0.9)\n",
" plt.contour(t1, t2, N, levels=levelsN)\n",
" plt.plot(path_J[:, 0], path_J[:, 1], \"w-o\")\n",
" plt.plot(path_N[:, 0], path_N[:, 1], \"y-^\")\n",
" plt.plot(t1_min, t2_min, \"rs\")\n",
" plt.title(r\"$\\ell_{}$ penalty\".format(i + 1), fontsize=16)\n",
" plt.axis([t1a, t1b, t2a, t2b])\n",
"\n",
" plt.subplot(222 + i * 2)\n",
" plt.grid(True)\n",
" plt.axhline(y=0, color='k')\n",
" plt.axvline(x=0, color='k')\n",
" plt.contourf(t1, t2, JR, levels=levelsJR, alpha=0.9)\n",
" plt.plot(path_JR[:, 0], path_JR[:, 1], \"w-o\")\n",
" plt.plot(t1r_min, t2r_min, \"rs\")\n",
" plt.title(title, fontsize=16)\n",
" plt.axis([t1a, t1b, t2a, t2b])\n",
"\n",
"for subplot in (221, 223):\n",
" plt.subplot(subplot)\n",
" plt.ylabel(r\"$\\theta_2$\", fontsize=20, rotation=0)\n",
"\n",
"for subplot in (223, 224):\n",
" plt.subplot(subplot)\n",
" plt.xlabel(r\"$\\theta_1$\", fontsize=20)\n",
"\n",
"save_fig(\"lasso_vs_ridge_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Logistic regression"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"t = np.linspace(-10, 10, 100)\n",
"sig = 1 / (1 + np.exp(-t))\n",
"plt.figure(figsize=(9, 3))\n",
"plt.plot([-10, 10], [0, 0], \"k-\")\n",
"plt.plot([-10, 10], [0.5, 0.5], \"k:\")\n",
"plt.plot([-10, 10], [1, 1], \"k:\")\n",
"plt.plot([0, 0], [-1.1, 1.1], \"k-\")\n",
"plt.plot(t, sig, \"b-\", linewidth=2, label=r\"$\\sigma(t) = \\frac{1}{1 + e^{-t}}$\")\n",
"plt.xlabel(\"t\")\n",
"plt.legend(loc=\"upper left\", fontsize=20)\n",
"plt.axis([-10, 10, -0.1, 1.1])\n",
"save_fig(\"logistic_function_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"iris = datasets.load_iris()\n",
"list(iris.keys())"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(iris.DESCR)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"X = iris[\"data\"][:, 3:] # petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int) # 1 if Iris-Virginica, else 0\n",
"\n",
"log_reg = LogisticRegression()\n",
"log_reg.fit(X, y)\n",
"\n",
"X_new = np.linspace(0, 3, 1000).reshape(-1, 1)\n",
"y_proba = log_reg.predict_proba(X_new)\n",
"decision_boundary = X_new[y_proba[:, 1] >= 0.5][0]\n",
"\n",
"plt.figure(figsize=(8, 3))\n",
"plt.plot(X[y==0], y[y==0], \"bs\")\n",
"plt.plot(X[y==1], y[y==1], \"g^\")\n",
"plt.plot([decision_boundary, decision_boundary], [-1, 2], \"k:\", linewidth=2)\n",
"plt.plot(X_new, y_proba[:, 1], \"g-\", linewidth=2, label=\"Iris-Virginica\")\n",
"plt.plot(X_new, y_proba[:, 0], \"b--\", linewidth=2, label=\"Not Iris-Virginica\")\n",
"plt.text(decision_boundary+0.02, 0.15, \"Decision boundary\", fontsize=14, color=\"k\", ha=\"center\")\n",
"plt.arrow(decision_boundary, 0.08, -0.3, 0, head_width=0.05, head_length=0.1, fc='b', ec='b')\n",
"plt.arrow(decision_boundary, 0.92, 0.3, 0, head_width=0.05, head_length=0.1, fc='g', ec='g')\n",
"plt.xlabel(\"Petal width (cm)\", fontsize=14)\n",
"plt.ylabel(\"Probability\", fontsize=14)\n",
"plt.legend(loc=\"center left\", fontsize=14)\n",
"plt.axis([0, 3, -0.02, 1.02])\n",
"save_fig(\"logistic_regression_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"decision_boundary"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"log_reg.predict([[1.7], [1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int)\n",
"\n",
"log_reg = LogisticRegression(C=10**10)\n",
"log_reg.fit(X, y)\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(2.9, 7, 500).reshape(-1, 1),\n",
" np.linspace(0.8, 2.7, 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"\n",
"y_proba = log_reg.predict_proba(X_new)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"bs\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"g^\")\n",
"\n",
"zz = y_proba[:, 1].reshape(x0.shape)\n",
"contour = plt.contour(x0, x1, zz, cmap=plt.cm.brg)\n",
"\n",
"\n",
"left_right = np.array([2.9, 7])\n",
"boundary = -(log_reg.coef_[0][0] * left_right + log_reg.intercept_[0]) / log_reg.coef_[0][1]\n",
"\n",
"plt.clabel(contour, inline=1, fontsize=12)\n",
"plt.plot(left_right, boundary, \"k--\", linewidth=3)\n",
"plt.text(3.5, 1.5, \"Not Iris-Virginica\", fontsize=14, color=\"b\", ha=\"center\")\n",
"plt.text(6.5, 2.3, \"Iris-Virginica\", fontsize=14, color=\"g\", ha=\"center\")\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.axis([2.9, 7, 0.8, 2.7])\n",
"save_fig(\"logistic_regression_contour_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = iris[\"target\"]\n",
"\n",
"softmax_reg = LogisticRegression(multi_class=\"multinomial\", solver=\"lbfgs\", C=10)\n",
"softmax_reg.fit(X, y)\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(0, 8, 500).reshape(-1, 1),\n",
" np.linspace(0, 3.5, 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"\n",
"\n",
"y_proba = softmax_reg.predict_proba(X_new)\n",
"y_predict = softmax_reg.predict(X_new)\n",
"\n",
"zz1 = y_proba[:, 1].reshape(x0.shape)\n",
"zz = y_predict.reshape(x0.shape)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==2, 0], X[y==2, 1], \"g^\", label=\"Iris-Virginica\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"bs\", label=\"Iris-Versicolor\")\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"yo\", label=\"Iris-Setosa\")\n",
"\n",
"from matplotlib.colors import ListedColormap\n",
"custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n",
"\n",
"plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n",
"contour = plt.contour(x0, x1, zz1, cmap=plt.cm.brg)\n",
"plt.clabel(contour, inline=1, fontsize=12)\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.legend(loc=\"center left\", fontsize=14)\n",
"plt.axis([0, 7, 0, 3.5])\n",
"save_fig(\"softmax_regression_contour_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"softmax_reg.predict([[5, 2]])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"softmax_reg.predict_proba([[5, 2]])"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"**Coming soon**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2+"
},
"nav_menu": {},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}