{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Training Linear Models**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import division, print_function, unicode_literals\n", "\n", "import numpy as np\n", "import numpy.random as rnd\n", "rnd.seed(42) # to make this notebook's output stable across runs\n", "\n", "from sklearn.linear_model import LinearRegression\n", "LinearRegression().fit([[5]], [3])\n", "\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "plt.rcParams['axes.labelsize'] = 14\n", "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", "PROJECT_ROOT_DIR = \"/Users/ageron/dev/py/ml/handson-ml\"\n", "CHAPTER_ID = \"training_linear_models\"\n", "\n", "def save_fig(fig_id, tight_layout=True):\n", " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format='png', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Linear regression using the Normal Equation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X = 2 * rnd.rand(100, 1)\n", "y = 4 + 3 * X + rnd.randn(100, 1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(X, y, \"b.\")\n", "plt.xlabel(\"$x_1$\", fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.axis([0, 2, 0, 15])\n", "save_fig(\"generated_data\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy.linalg as LA\n", "\n", "Xb = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance\n", "theta_best = LA.inv(Xb.T.dot(Xb)).dot(Xb.T).dot(y)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "theta_best" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_new = np.array([[0], [2]])\n", "X_newb = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance\n", "y_predict = X_newb.dot(theta_best)\n", "y_predict" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(X_new, y_predict, \"r-\", linewidth=2, label=\"Predictions\")\n", "plt.plot(X, y, \"b.\")\n", "plt.xlabel(\"$x_1$\", fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.legend(loc=\"upper left\", fontsize=14)\n", "plt.axis([0, 2, 0, 15])\n", "save_fig(\"linear_model_predictions\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "lin_reg = LinearRegression()\n", "lin_reg.fit(X, y)\n", "lin_reg.intercept_, lin_reg.coef_" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "lin_reg.predict(X_new)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Linear regression using batch gradient descent" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "theta_path_bgd = []\n", "\n", "def plot_gradient_descent(theta, eta, theta_path=None):\n", " m = len(Xb)\n", " plt.plot(X, y, \"b.\")\n", " n_iterations = 1000\n", " for iteration in range(n_iterations):\n", " if iteration < 10:\n", " y_predict = X_newb.dot(theta)\n", " style = \"b-\" if iteration > 0 else \"r--\"\n", " plt.plot(X_new, y_predict, style)\n", " gradients = 2/m * Xb.T.dot(Xb.dot(theta) - y)\n", " theta = theta - eta * gradients\n", " if theta_path is not None:\n", " theta_path.append(theta)\n", " plt.xlabel(\"$x_1$\", fontsize=18)\n", " plt.axis([0, 2, 0, 15])\n", " plt.title(r\"$\\eta = {}$\".format(eta), fontsize=16)\n", "\n", "rnd.seed(42)\n", "theta = rnd.randn(2,1) # random initialization\n", "\n", "plt.figure(figsize=(10,4))\n", "plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.subplot(132); plot_gradient_descent(theta, eta=0.1, theta_path=theta_path_bgd)\n", "plt.subplot(133); plot_gradient_descent(theta, eta=0.5)\n", "\n", "save_fig(\"gradient_descent_plot\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Stochastic Gradient Descent" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "theta_path_sgd = []\n", "\n", "n_iterations = 50\n", "t0, t1 = 5, 50 # learning schedule hyperparameters\n", "\n", "rnd.seed(42)\n", "theta = rnd.randn(2,1) # random initialization\n", "\n", "def learning_schedule(t):\n", " return t0 / (t + t1)\n", "\n", "m = len(Xb)\n", "\n", "for epoch in range(n_iterations):\n", " shuffled_indices = rnd.permutation(m)\n", " Xb_shuffled = Xb[shuffled_indices]\n", " y_shuffled = y[shuffled_indices]\n", " for i in range(m):\n", " if epoch == 0 and i < 20:\n", " y_predict = X_newb.dot(theta)\n", " style = \"b-\" if i > 0 else \"r--\"\n", " plt.plot(X_new, y_predict, style)\n", " xi = Xb_shuffled[i:i+1]\n", " yi = y_shuffled[i:i+1]\n", " gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n", " eta = learning_schedule(epoch * m + i)\n", " theta = theta - eta * gradients\n", " theta_path_sgd.append(theta)\n", "\n", "plt.plot(X, y, \"b.\")\n", "plt.xlabel(\"$x_1$\", fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.axis([0, 2, 0, 15])\n", "save_fig(\"sgd_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "theta" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import SGDRegressor\n", "sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)\n", "sgd_reg.fit(X, y.ravel())" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sgd_reg.intercept_, sgd_reg.coef_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Mini-batch gradient descent" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "theta_path_mgd = []\n", "\n", "n_iterations = 50\n", "minibatch_size = 20\n", "\n", "rnd.seed(42)\n", "theta = rnd.randn(2,1) # random initialization\n", "\n", "t0, t1 = 10, 1000\n", "def learning_schedule(t):\n", " return t0 / (t + t1)\n", "\n", "t = 0\n", "for epoch in range(n_iterations):\n", " shuffled_indices = rnd.permutation(m)\n", " Xb_shuffled = Xb[shuffled_indices]\n", " y_shuffled = y[shuffled_indices]\n", " for i in range(0, m, minibatch_size):\n", " t += 1\n", " xi = Xb_shuffled[i:i+minibatch_size]\n", " yi = y_shuffled[i:i+minibatch_size]\n", " gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n", " eta = learning_schedule(t)\n", " theta = theta - eta * gradients\n", " theta_path_mgd.append(theta)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "theta" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [], "source": [ "theta_path_bgd = np.array(theta_path_bgd)\n", "theta_path_sgd = np.array(theta_path_sgd)\n", "theta_path_mgd = np.array(theta_path_mgd)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure(figsize=(7,4))\n", "plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], \"r-s\", linewidth=1, label=\"Stochastic\")\n", "plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], \"g-+\", linewidth=2, label=\"Mini-batch\")\n", "plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], \"b-o\", linewidth=3, label=\"Batch\")\n", "plt.legend(loc=\"upper left\", fontsize=16)\n", "plt.xlabel(r\"$\\theta_0$\", fontsize=20)\n", "plt.ylabel(r\"$\\theta_1$ \", fontsize=20, rotation=0)\n", "plt.axis([2.5, 4.5, 2.3, 3.9])\n", "save_fig(\"gradient_descent_paths_plot\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Polynomial regression" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import numpy.random as rnd\n", "\n", "rnd.seed(42)\n", "m = 100\n", "X = 6 * rnd.rand(m, 1) - 3\n", "y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(X, y, \"b.\")\n", "plt.xlabel(\"$x_1$\", fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.axis([-3, 3, 0, 10])\n", "save_fig(\"quadratic_data_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.preprocessing import PolynomialFeatures\n", "poly_features = PolynomialFeatures(degree=2, include_bias=False)\n", "X_poly = poly_features.fit_transform(X)\n", "X[0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_poly[0]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "lin_reg = LinearRegression()\n", "lin_reg.fit(X_poly, y)\n", "lin_reg.intercept_, lin_reg.coef_" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_new=np.linspace(-3, 3, 100).reshape(100, 1)\n", "X_new_poly = poly_features.transform(X_new)\n", "y_new = lin_reg.predict(X_new_poly)\n", "plt.plot(X, y, \"b.\")\n", "plt.plot(X_new, y_new, \"r-\", linewidth=2, label=\"Predictions\")\n", "plt.xlabel(\"$x_1$\", fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.legend(loc=\"upper left\", fontsize=14)\n", "plt.axis([-3, 3, 0, 10])\n", "save_fig(\"quadratic_predictions_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "\n", "for style, width, degree in ((\"g-\", 1, 300), (\"b--\", 2, 2), (\"r-+\", 2, 1)):\n", " polybig_features = PolynomialFeatures(degree=degree, include_bias=False)\n", " std_scaler = StandardScaler()\n", " lin_reg = LinearRegression()\n", " polynomial_regression = Pipeline((\n", " (\"poly_features\", polybig_features),\n", " (\"std_scaler\", std_scaler),\n", " (\"lin_reg\", lin_reg),\n", " ))\n", " polynomial_regression.fit(X, y)\n", " y_newbig = polynomial_regression.predict(X_new)\n", " plt.plot(X_new, y_newbig, style, label=str(degree), linewidth=width)\n", "\n", "plt.plot(X, y, \"b.\", linewidth=3)\n", "plt.legend(loc=\"upper left\")\n", "plt.xlabel(\"$x_1$\", fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.axis([-3, 3, 0, 10])\n", "save_fig(\"high_degree_polynomials_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error\n", "from sklearn.cross_validation import train_test_split\n", "\n", "def plot_learning_curves(model, X, y):\n", " X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=10)\n", " train_errors, val_errors = [], []\n", " for m in range(1, len(X_train)):\n", " model.fit(X_train[:m], y_train[:m])\n", " y_train_predict = model.predict(X_train[:m])\n", " y_val_predict = model.predict(X_val)\n", " train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))\n", " val_errors.append(mean_squared_error(y_val_predict, y_val))\n", "\n", " plt.plot(np.sqrt(train_errors), \"r-+\", linewidth=2, label=\"Training set\")\n", " plt.plot(np.sqrt(val_errors), \"b-\", linewidth=3, label=\"Validation set\")\n", " plt.legend(loc=\"upper right\", fontsize=14)\n", " plt.xlabel(\"Training set size\", fontsize=14)\n", " plt.ylabel(\"RMSE\", fontsize=14)\n", "\n", "lin_reg = LinearRegression()\n", "plot_learning_curves(lin_reg, X, y)\n", "plt.axis([0, 80, 0, 3])\n", "save_fig(\"underfitting_learning_curves_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "\n", "polynomial_regression = Pipeline((\n", " (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n", " (\"sgd_reg\", LinearRegression()),\n", " ))\n", "\n", "plot_learning_curves(polynomial_regression, X, y)\n", "plt.axis([0, 80, 0, 3])\n", "save_fig(\"learning_curves_plot\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Regularized models" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import Ridge\n", "\n", "rnd.seed(42)\n", "m = 20\n", "X = 3 * rnd.rand(m, 1)\n", "y = 1 + 0.5 * X + rnd.randn(m, 1) / 1.5\n", "X_new = np.linspace(0, 3, 100).reshape(100, 1)\n", "\n", "def plot_model(model_class, polynomial, alphas, **model_kargs):\n", " for alpha, style in zip(alphas, (\"b-\", \"g--\", \"r:\")):\n", " model = model_class(alpha, **model_kargs) if alpha > 0 else LinearRegression()\n", " if polynomial:\n", " model = Pipeline((\n", " (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n", " (\"std_scaler\", StandardScaler()),\n", " (\"regul_reg\", model),\n", " ))\n", " model.fit(X, y)\n", " y_new_regul = model.predict(X_new)\n", " lw = 2 if alpha > 0 else 1\n", " plt.plot(X_new, y_new_regul, style, linewidth=lw, label=r\"$\\alpha = {}$\".format(alpha))\n", " plt.plot(X, y, \"b.\", linewidth=3)\n", " plt.legend(loc=\"upper left\", fontsize=15)\n", " plt.xlabel(\"$x_1$\", fontsize=18)\n", " plt.axis([0, 3, 0, 4])\n", "\n", "plt.figure(figsize=(8,4))\n", "plt.subplot(121)\n", "plot_model(Ridge, polynomial=False, alphas=(0, 10, 100))\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.subplot(122)\n", "plot_model(Ridge, polynomial=True, alphas=(0, 10**-5, 1))\n", "\n", "save_fig(\"ridge_regression_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import Ridge\n", "ridge_reg = Ridge(alpha=1, solver=\"cholesky\")\n", "ridge_reg.fit(X, y)\n", "ridge_reg.predict([[1.5]])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sgd_reg = SGDRegressor(penalty=\"l2\", random_state=42)\n", "sgd_reg.fit(X, y.ravel())\n", "ridge_reg.predict([[1.5]])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ridge_reg = Ridge(alpha=1, solver=\"sag\")\n", "ridge_reg.fit(X, y)\n", "ridge_reg.predict([[1.5]])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import Lasso\n", "\n", "plt.figure(figsize=(8,4))\n", "plt.subplot(121)\n", "plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1))\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.subplot(122)\n", "plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1)\n", "\n", "save_fig(\"lasso_regression_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import Lasso\n", "lasso_reg = Lasso(alpha=0.1)\n", "lasso_reg.fit(X, y)\n", "lasso_reg.predict([[1.5]])" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import ElasticNet\n", "elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)\n", "elastic_net.fit(X, y)\n", "elastic_net.predict([[1.5]])" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "rnd.seed(42)\n", "m = 100\n", "X = 6 * rnd.rand(m, 1) - 3\n", "y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)\n", "\n", "X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n", "\n", "poly_scaler = Pipeline((\n", " (\"poly_features\", PolynomialFeatures(degree=90, include_bias=False)),\n", " (\"std_scaler\", StandardScaler()),\n", " ))\n", "\n", "X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n", "X_val_poly_scaled = poly_scaler.transform(X_val)\n", "\n", "sgd_reg = SGDRegressor(n_iter=1,\n", " penalty=None,\n", " eta0=0.0005,\n", " warm_start=True,\n", " learning_rate=\"constant\",\n", " random_state=42)\n", "\n", "n_epochs = 500\n", "train_errors, val_errors = [], []\n", "for epoch in range(n_epochs):\n", " sgd_reg.fit(X_train_poly_scaled, y_train)\n", " y_train_predict = sgd_reg.predict(X_train_poly_scaled)\n", " y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n", " train_errors.append(mean_squared_error(y_train_predict, y_train))\n", " val_errors.append(mean_squared_error(y_val_predict, y_val))\n", "\n", "best_epoch = np.argmin(val_errors)\n", "best_val_rmse = np.sqrt(val_errors[best_epoch])\n", "\n", "plt.annotate('Best model',\n", " xy=(best_epoch, best_val_rmse),\n", " xytext=(best_epoch, best_val_rmse + 1),\n", " ha=\"center\",\n", " arrowprops=dict(facecolor='black', shrink=0.05),\n", " fontsize=16,\n", " )\n", "\n", "best_val_rmse -= 0.03 # just to make the graph look better\n", "plt.plot([0, n_epochs], [best_val_rmse, best_val_rmse], \"k:\", linewidth=2)\n", "plt.plot(np.sqrt(val_errors), \"b-\", linewidth=3, label=\"Validation set\")\n", "plt.plot(np.sqrt(train_errors), \"r--\", linewidth=2, label=\"Training set\")\n", "plt.legend(loc=\"upper right\", fontsize=14)\n", "plt.xlabel(\"Epoch\", fontsize=14)\n", "plt.ylabel(\"RMSE\", fontsize=14)\n", "save_fig(\"early_stopping_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.base import clone\n", "sgd_reg = SGDRegressor(n_iter=1, warm_start=True, penalty=None,\n", " learning_rate=\"constant\", eta0=0.0005,\n", " random_state=42)\n", "\n", "minimum_val_error = float(\"inf\")\n", "best_epoch = None\n", "best_model = None\n", "for epoch in range(1000):\n", " sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n", " y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n", " val_error = mean_squared_error(y_val_predict, y_val)\n", " if val_error < minimum_val_error:\n", " minimum_val_error = val_error\n", " best_epoch = epoch\n", " best_model = clone(sgd_reg)\n", "\n", "best_epoch, best_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Logistic regression" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [], "source": [ "t = np.linspace(-10, 10, 100)\n", "sig = 1 / (1 + np.exp(-t))\n", "plt.figure(figsize=(9, 3))\n", "plt.plot([-10, 10], [0, 0], \"k-\")\n", "plt.plot([-10, 10], [0.5, 0.5], \"k:\")\n", "plt.plot([-10, 10], [1, 1], \"k:\")\n", "plt.plot([0, 0], [-1.1, 1.1], \"k-\")\n", "plt.plot(t, sig, \"b-\", linewidth=2, label=r\"$\\sigma(t) = \\frac{1}{1 + e^{-t}}$\")\n", "plt.xlabel(\"t\")\n", "plt.legend(loc=\"upper left\", fontsize=20)\n", "plt.axis([-10, 10, -0.1, 1.1])\n", "save_fig(\"logistic_function_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn import datasets\n", "iris = datasets.load_iris()\n", "list(iris.keys())" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(iris.DESCR)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "\n", "X = iris[\"data\"][:, 3:] # petal width\n", "y = (iris[\"target\"] == 2).astype(np.int) # 1 if Iris-Virginica, else 0\n", "\n", "log_reg = LogisticRegression()\n", "log_reg.fit(X, y)\n", "\n", "X_new = np.linspace(0, 3, 1000).reshape(-1, 1)\n", "y_proba = log_reg.predict_proba(X_new)\n", "decision_boundary = X_new[y_proba[:, 1] >= 0.5][0]\n", "\n", "plt.figure(figsize=(8, 3))\n", "plt.plot(X[y==0], y[y==0], \"bs\")\n", "plt.plot(X[y==1], y[y==1], \"g^\")\n", "plt.plot([decision_boundary, decision_boundary], [-1, 2], \"k:\", linewidth=2)\n", "plt.plot(X_new, y_proba[:, 1], \"g-\", linewidth=2, label=\"Iris-Virginica\")\n", "plt.plot(X_new, y_proba[:, 0], \"b--\", linewidth=2, label=\"Not Iris-Virginica\")\n", "plt.text(decision_boundary+0.02, 0.15, \"Decision boundary\", fontsize=14, color=\"k\", ha=\"center\")\n", "plt.arrow(decision_boundary, 0.08, -0.3, 0, head_width=0.05, head_length=0.1, fc='b', ec='b')\n", "plt.arrow(decision_boundary, 0.92, 0.3, 0, head_width=0.05, head_length=0.1, fc='g', ec='g')\n", "plt.xlabel(\"Petal width (cm)\", fontsize=14)\n", "plt.ylabel(\"Probability\", fontsize=14)\n", "plt.legend(loc=\"center left\", fontsize=14)\n", "plt.axis([0, 3, -0.02, 1.02])\n", "save_fig(\"logistic_regression_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [], "source": [ "decision_boundary" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [], "source": [ "log_reg.predict([[1.7], [1.5]])" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "\n", "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", "y = (iris[\"target\"] == 2).astype(np.int)\n", "\n", "log_reg = LogisticRegression(C=10**10)\n", "log_reg.fit(X, y)\n", "\n", "x0, x1 = np.meshgrid(\n", " np.linspace(2.9, 7, 500).reshape(-1, 1),\n", " np.linspace(0.8, 2.7, 200).reshape(-1, 1),\n", " )\n", "X_new = np.c_[x0.ravel(), x1.ravel()]\n", "\n", "y_proba = log_reg.predict_proba(X_new)\n", "\n", "plt.figure(figsize=(10, 4))\n", "plt.plot(X[y==0, 0], X[y==0, 1], \"bs\")\n", "plt.plot(X[y==1, 0], X[y==1, 1], \"g^\")\n", "\n", "zz = y_proba[:, 1].reshape(x0.shape)\n", "contour = plt.contour(x0, x1, zz, cmap=plt.cm.brg)\n", "\n", "\n", "left_right = np.array([2.9, 7])\n", "boundary = -(log_reg.coef_[0][0] * left_right + log_reg.intercept_[0]) / log_reg.coef_[0][1]\n", "\n", "plt.clabel(contour, inline=1, fontsize=12)\n", "plt.plot(left_right, boundary, \"k--\", linewidth=3)\n", "plt.text(3.5, 1.5, \"Not Iris-Virginica\", fontsize=14, color=\"b\", ha=\"center\")\n", "plt.text(6.5, 2.3, \"Iris-Virginica\", fontsize=14, color=\"g\", ha=\"center\")\n", "plt.xlabel(\"Petal length\", fontsize=14)\n", "plt.ylabel(\"Petal width\", fontsize=14)\n", "plt.axis([2.9, 7, 0.8, 2.7])\n", "save_fig(\"logistic_regression_contour_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "\n", "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", "y = iris[\"target\"]\n", "\n", "softmax_reg = LogisticRegression(multi_class=\"multinomial\", solver=\"lbfgs\", C=10)\n", "softmax_reg.fit(X, y)\n", "\n", "x0, x1 = np.meshgrid(\n", " np.linspace(0, 8, 500).reshape(-1, 1),\n", " np.linspace(0, 3.5, 200).reshape(-1, 1),\n", " )\n", "X_new = np.c_[x0.ravel(), x1.ravel()]\n", "\n", "\n", "y_proba = softmax_reg.predict_proba(X_new)\n", "y_predict = softmax_reg.predict(X_new)\n", "\n", "zz1 = y_proba[:, 1].reshape(x0.shape)\n", "zz = y_predict.reshape(x0.shape)\n", "\n", "plt.figure(figsize=(10, 4))\n", "plt.plot(X[y==2, 0], X[y==2, 1], \"g^\", label=\"Iris-Virginica\")\n", "plt.plot(X[y==1, 0], X[y==1, 1], \"bs\", label=\"Iris-Versicolour\")\n", "plt.plot(X[y==0, 0], X[y==0, 1], \"yo\", label=\"Iris-Setosa\")\n", "\n", "from matplotlib.colors import ListedColormap\n", "custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n", "\n", "plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n", "contour = plt.contour(x0, x1, zz1, cmap=plt.cm.brg)\n", "plt.clabel(contour, inline=1, fontsize=12)\n", "plt.xlabel(\"Petal length\", fontsize=14)\n", "plt.ylabel(\"Petal width\", fontsize=14)\n", "plt.legend(loc=\"center left\", fontsize=14)\n", "plt.axis([0, 7, 0, 3.5])\n", "save_fig(\"softmax_regression_contour_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [], "source": [ "softmax_reg.predict([[5, 2]])" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [], "source": [ "softmax_reg.predict_proba([[5, 2]])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" }, "toc": { "toc_cell": false, "toc_number_sections": true, "toc_threshold": 6, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }