handson-ml/training_linear_models.ipynb

1057 lines
29 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Training Linear Models**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"import os\n",
"\n",
"import numpy as np\n",
"import numpy.random as rnd\n",
"rnd.seed(42) # to make this notebook's output stable across runs\n",
"\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"PROJECT_ROOT_DIR = \"/Users/ageron/dev/py/ml/handson-ml\"\n",
"CHAPTER_ID = \"training_linear_models\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear regression using the Normal Equation"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X = 2 * rnd.rand(100, 1)\n",
"y = 4 + 3 * X + rnd.randn(100, 1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([0, 2, 0, 15])\n",
"save_fig(\"generated_data\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy.linalg as LA\n",
"\n",
"Xb = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance\n",
"theta_best = LA.inv(Xb.T.dot(Xb)).dot(Xb.T).dot(y)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta_best"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_new = np.array([[0], [2]])\n",
"X_newb = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance\n",
"y_predict = X_newb.dot(theta_best)\n",
"y_predict"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.plot(X_new, y_predict, \"r-\", linewidth=2, label=\"Predictions\")\n",
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.legend(loc=\"upper left\", fontsize=14)\n",
"plt.axis([0, 2, 0, 15])\n",
"save_fig(\"linear_model_predictions\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"lin_reg = LinearRegression()\n",
"lin_reg.fit(X, y)\n",
"lin_reg.intercept_, lin_reg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lin_reg.predict(X_new)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear regression using batch gradient descent"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta_path_bgd = []\n",
"\n",
"def plot_gradient_descent(theta, eta, theta_path=None):\n",
" m = len(Xb)\n",
" plt.plot(X, y, \"b.\")\n",
" n_iterations = 1000\n",
" for iteration in range(n_iterations):\n",
" if iteration < 10:\n",
" y_predict = X_newb.dot(theta)\n",
" style = \"b-\" if iteration > 0 else \"r--\"\n",
" plt.plot(X_new, y_predict, style)\n",
" gradients = 2/m * Xb.T.dot(Xb.dot(theta) - y)\n",
" theta = theta - eta * gradients\n",
" if theta_path is not None:\n",
" theta_path.append(theta)\n",
" plt.xlabel(\"$x_1$\", fontsize=18)\n",
" plt.axis([0, 2, 0, 15])\n",
" plt.title(r\"$\\eta = {}$\".format(eta), fontsize=16)\n",
"\n",
"rnd.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(132); plot_gradient_descent(theta, eta=0.1, theta_path=theta_path_bgd)\n",
"plt.subplot(133); plot_gradient_descent(theta, eta=0.5)\n",
"\n",
"save_fig(\"gradient_descent_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stochastic Gradient Descent"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta_path_sgd = []\n",
"\n",
"n_iterations = 50\n",
"t0, t1 = 5, 50 # learning schedule hyperparameters\n",
"\n",
"rnd.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n",
"\n",
"def learning_schedule(t):\n",
" return t0 / (t + t1)\n",
"\n",
"m = len(Xb)\n",
"\n",
"for epoch in range(n_iterations):\n",
" shuffled_indices = rnd.permutation(m)\n",
" Xb_shuffled = Xb[shuffled_indices]\n",
" y_shuffled = y[shuffled_indices]\n",
" for i in range(m):\n",
" if epoch == 0 and i < 20:\n",
" y_predict = X_newb.dot(theta)\n",
" style = \"b-\" if i > 0 else \"r--\"\n",
" plt.plot(X_new, y_predict, style)\n",
" xi = Xb_shuffled[i:i+1]\n",
" yi = y_shuffled[i:i+1]\n",
" gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
" eta = learning_schedule(epoch * m + i)\n",
" theta = theta - eta * gradients\n",
" theta_path_sgd.append(theta)\n",
"\n",
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([0, 2, 0, 15])\n",
"save_fig(\"sgd_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import SGDRegressor\n",
"sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)\n",
"sgd_reg.fit(X, y.ravel())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"sgd_reg.intercept_, sgd_reg.coef_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Mini-batch gradient descent"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"theta_path_mgd = []\n",
"\n",
"n_iterations = 50\n",
"minibatch_size = 20\n",
"\n",
"rnd.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n",
"\n",
"t0, t1 = 10, 1000\n",
"def learning_schedule(t):\n",
" return t0 / (t + t1)\n",
"\n",
"t = 0\n",
"for epoch in range(n_iterations):\n",
" shuffled_indices = rnd.permutation(m)\n",
" Xb_shuffled = Xb[shuffled_indices]\n",
" y_shuffled = y[shuffled_indices]\n",
" for i in range(0, m, minibatch_size):\n",
" t += 1\n",
" xi = Xb_shuffled[i:i+minibatch_size]\n",
" yi = y_shuffled[i:i+minibatch_size]\n",
" gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
" eta = learning_schedule(t)\n",
" theta = theta - eta * gradients\n",
" theta_path_mgd.append(theta)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta_path_bgd = np.array(theta_path_bgd)\n",
"theta_path_sgd = np.array(theta_path_sgd)\n",
"theta_path_mgd = np.array(theta_path_mgd)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.figure(figsize=(7,4))\n",
"plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], \"r-s\", linewidth=1, label=\"Stochastic\")\n",
"plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], \"g-+\", linewidth=2, label=\"Mini-batch\")\n",
"plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], \"b-o\", linewidth=3, label=\"Batch\")\n",
"plt.legend(loc=\"upper left\", fontsize=16)\n",
"plt.xlabel(r\"$\\theta_0$\", fontsize=20)\n",
"plt.ylabel(r\"$\\theta_1$ \", fontsize=20, rotation=0)\n",
"plt.axis([2.5, 4.5, 2.3, 3.9])\n",
"save_fig(\"gradient_descent_paths_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Polynomial regression"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import numpy.random as rnd\n",
"\n",
"rnd.seed(42)\n",
"m = 100\n",
"X = 6 * rnd.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([-3, 3, 0, 10])\n",
"save_fig(\"quadratic_data_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.preprocessing import PolynomialFeatures\n",
"poly_features = PolynomialFeatures(degree=2, include_bias=False)\n",
"X_poly = poly_features.fit_transform(X)\n",
"X[0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_poly[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lin_reg = LinearRegression()\n",
"lin_reg.fit(X_poly, y)\n",
"lin_reg.intercept_, lin_reg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_new=np.linspace(-3, 3, 100).reshape(100, 1)\n",
"X_new_poly = poly_features.transform(X_new)\n",
"y_new = lin_reg.predict(X_new_poly)\n",
"plt.plot(X, y, \"b.\")\n",
"plt.plot(X_new, y_new, \"r-\", linewidth=2, label=\"Predictions\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.legend(loc=\"upper left\", fontsize=14)\n",
"plt.axis([-3, 3, 0, 10])\n",
"save_fig(\"quadratic_predictions_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"for style, width, degree in ((\"g-\", 1, 300), (\"b--\", 2, 2), (\"r-+\", 2, 1)):\n",
" polybig_features = PolynomialFeatures(degree=degree, include_bias=False)\n",
" std_scaler = StandardScaler()\n",
" lin_reg = LinearRegression()\n",
" polynomial_regression = Pipeline((\n",
" (\"poly_features\", polybig_features),\n",
" (\"std_scaler\", std_scaler),\n",
" (\"lin_reg\", lin_reg),\n",
" ))\n",
" polynomial_regression.fit(X, y)\n",
" y_newbig = polynomial_regression.predict(X_new)\n",
" plt.plot(X_new, y_newbig, style, label=str(degree), linewidth=width)\n",
"\n",
"plt.plot(X, y, \"b.\", linewidth=3)\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"$x_1$\", fontsize=18)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.axis([-3, 3, 0, 10])\n",
"save_fig(\"high_degree_polynomials_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.cross_validation import train_test_split\n",
"\n",
"def plot_learning_curves(model, X, y):\n",
" X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=10)\n",
" train_errors, val_errors = [], []\n",
" for m in range(1, len(X_train)):\n",
" model.fit(X_train[:m], y_train[:m])\n",
" y_train_predict = model.predict(X_train[:m])\n",
" y_val_predict = model.predict(X_val)\n",
" train_errors.append(mean_squared_error(y_train_predict, y_train[:m]))\n",
" val_errors.append(mean_squared_error(y_val_predict, y_val))\n",
"\n",
" plt.plot(np.sqrt(train_errors), \"r-+\", linewidth=2, label=\"Training set\")\n",
" plt.plot(np.sqrt(val_errors), \"b-\", linewidth=3, label=\"Validation set\")\n",
" plt.legend(loc=\"upper right\", fontsize=14)\n",
" plt.xlabel(\"Training set size\", fontsize=14)\n",
" plt.ylabel(\"RMSE\", fontsize=14)\n",
"\n",
"lin_reg = LinearRegression()\n",
"plot_learning_curves(lin_reg, X, y)\n",
"plt.axis([0, 80, 0, 3])\n",
"save_fig(\"underfitting_learning_curves_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"\n",
"polynomial_regression = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n",
" (\"sgd_reg\", LinearRegression()),\n",
" ))\n",
"\n",
"plot_learning_curves(polynomial_regression, X, y)\n",
"plt.axis([0, 80, 0, 3])\n",
"save_fig(\"learning_curves_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Regularized models"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"\n",
"rnd.seed(42)\n",
"m = 20\n",
"X = 3 * rnd.rand(m, 1)\n",
"y = 1 + 0.5 * X + rnd.randn(m, 1) / 1.5\n",
"X_new = np.linspace(0, 3, 100).reshape(100, 1)\n",
"\n",
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
" for alpha, style in zip(alphas, (\"b-\", \"g--\", \"r:\")):\n",
" model = model_class(alpha, **model_kargs) if alpha > 0 else LinearRegression()\n",
" if polynomial:\n",
" model = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n",
" (\"std_scaler\", StandardScaler()),\n",
" (\"regul_reg\", model),\n",
" ))\n",
" model.fit(X, y)\n",
" y_new_regul = model.predict(X_new)\n",
" lw = 2 if alpha > 0 else 1\n",
" plt.plot(X_new, y_new_regul, style, linewidth=lw, label=r\"$\\alpha = {}$\".format(alpha))\n",
" plt.plot(X, y, \"b.\", linewidth=3)\n",
" plt.legend(loc=\"upper left\", fontsize=15)\n",
" plt.xlabel(\"$x_1$\", fontsize=18)\n",
" plt.axis([0, 3, 0, 4])\n",
"\n",
"plt.figure(figsize=(8,4))\n",
"plt.subplot(121)\n",
"plot_model(Ridge, polynomial=False, alphas=(0, 10, 100))\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n",
"plot_model(Ridge, polynomial=True, alphas=(0, 10**-5, 1))\n",
"\n",
"save_fig(\"ridge_regression_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\")\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"sgd_reg = SGDRegressor(penalty=\"l2\", random_state=42)\n",
"sgd_reg.fit(X, y.ravel())\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\")\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import Lasso\n",
"\n",
"plt.figure(figsize=(8,4))\n",
"plt.subplot(121)\n",
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1))\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n",
"plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1)\n",
"\n",
"save_fig(\"lasso_regression_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import Lasso\n",
"lasso_reg = Lasso(alpha=0.1)\n",
"lasso_reg.fit(X, y)\n",
"lasso_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import ElasticNet\n",
"elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)\n",
"elastic_net.fit(X, y)\n",
"elastic_net.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"rnd.seed(42)\n",
"m = 100\n",
"X = 6 * rnd.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n",
"\n",
"poly_scaler = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=90, include_bias=False)),\n",
" (\"std_scaler\", StandardScaler()),\n",
" ))\n",
"\n",
"X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n",
"X_val_poly_scaled = poly_scaler.transform(X_val)\n",
"\n",
"sgd_reg = SGDRegressor(n_iter=1,\n",
" penalty=None,\n",
" eta0=0.0005,\n",
" warm_start=True,\n",
" learning_rate=\"constant\",\n",
" random_state=42)\n",
"\n",
"n_epochs = 500\n",
"train_errors, val_errors = [], []\n",
"for epoch in range(n_epochs):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train)\n",
" y_train_predict = sgd_reg.predict(X_train_poly_scaled)\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" train_errors.append(mean_squared_error(y_train_predict, y_train))\n",
" val_errors.append(mean_squared_error(y_val_predict, y_val))\n",
"\n",
"best_epoch = np.argmin(val_errors)\n",
"best_val_rmse = np.sqrt(val_errors[best_epoch])\n",
"\n",
"plt.annotate('Best model',\n",
" xy=(best_epoch, best_val_rmse),\n",
" xytext=(best_epoch, best_val_rmse + 1),\n",
" ha=\"center\",\n",
" arrowprops=dict(facecolor='black', shrink=0.05),\n",
" fontsize=16,\n",
" )\n",
"\n",
"best_val_rmse -= 0.03 # just to make the graph look better\n",
"plt.plot([0, n_epochs], [best_val_rmse, best_val_rmse], \"k:\", linewidth=2)\n",
"plt.plot(np.sqrt(val_errors), \"b-\", linewidth=3, label=\"Validation set\")\n",
"plt.plot(np.sqrt(train_errors), \"r--\", linewidth=2, label=\"Training set\")\n",
"plt.legend(loc=\"upper right\", fontsize=14)\n",
"plt.xlabel(\"Epoch\", fontsize=14)\n",
"plt.ylabel(\"RMSE\", fontsize=14)\n",
"save_fig(\"early_stopping_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.base import clone\n",
"sgd_reg = SGDRegressor(n_iter=1, warm_start=True, penalty=None,\n",
" learning_rate=\"constant\", eta0=0.0005,\n",
" random_state=42)\n",
"\n",
"minimum_val_error = float(\"inf\")\n",
"best_epoch = None\n",
"best_model = None\n",
"for epoch in range(1000):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" val_error = mean_squared_error(y_val_predict, y_val)\n",
" if val_error < minimum_val_error:\n",
" minimum_val_error = val_error\n",
" best_epoch = epoch\n",
" best_model = clone(sgd_reg)\n",
"\n",
"best_epoch, best_model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Logistic regression"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"t = np.linspace(-10, 10, 100)\n",
"sig = 1 / (1 + np.exp(-t))\n",
"plt.figure(figsize=(9, 3))\n",
"plt.plot([-10, 10], [0, 0], \"k-\")\n",
"plt.plot([-10, 10], [0.5, 0.5], \"k:\")\n",
"plt.plot([-10, 10], [1, 1], \"k:\")\n",
"plt.plot([0, 0], [-1.1, 1.1], \"k-\")\n",
"plt.plot(t, sig, \"b-\", linewidth=2, label=r\"$\\sigma(t) = \\frac{1}{1 + e^{-t}}$\")\n",
"plt.xlabel(\"t\")\n",
"plt.legend(loc=\"upper left\", fontsize=20)\n",
"plt.axis([-10, 10, -0.1, 1.1])\n",
"save_fig(\"logistic_function_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"iris = datasets.load_iris()\n",
"list(iris.keys())"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(iris.DESCR)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"X = iris[\"data\"][:, 3:] # petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int) # 1 if Iris-Virginica, else 0\n",
"\n",
"log_reg = LogisticRegression()\n",
"log_reg.fit(X, y)\n",
"\n",
"X_new = np.linspace(0, 3, 1000).reshape(-1, 1)\n",
"y_proba = log_reg.predict_proba(X_new)\n",
"decision_boundary = X_new[y_proba[:, 1] >= 0.5][0]\n",
"\n",
"plt.figure(figsize=(8, 3))\n",
"plt.plot(X[y==0], y[y==0], \"bs\")\n",
"plt.plot(X[y==1], y[y==1], \"g^\")\n",
"plt.plot([decision_boundary, decision_boundary], [-1, 2], \"k:\", linewidth=2)\n",
"plt.plot(X_new, y_proba[:, 1], \"g-\", linewidth=2, label=\"Iris-Virginica\")\n",
"plt.plot(X_new, y_proba[:, 0], \"b--\", linewidth=2, label=\"Not Iris-Virginica\")\n",
"plt.text(decision_boundary+0.02, 0.15, \"Decision boundary\", fontsize=14, color=\"k\", ha=\"center\")\n",
"plt.arrow(decision_boundary, 0.08, -0.3, 0, head_width=0.05, head_length=0.1, fc='b', ec='b')\n",
"plt.arrow(decision_boundary, 0.92, 0.3, 0, head_width=0.05, head_length=0.1, fc='g', ec='g')\n",
"plt.xlabel(\"Petal width (cm)\", fontsize=14)\n",
"plt.ylabel(\"Probability\", fontsize=14)\n",
"plt.legend(loc=\"center left\", fontsize=14)\n",
"plt.axis([0, 3, -0.02, 1.02])\n",
"save_fig(\"logistic_regression_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"decision_boundary"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"log_reg.predict([[1.7], [1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int)\n",
"\n",
"log_reg = LogisticRegression(C=10**10)\n",
"log_reg.fit(X, y)\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(2.9, 7, 500).reshape(-1, 1),\n",
" np.linspace(0.8, 2.7, 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"\n",
"y_proba = log_reg.predict_proba(X_new)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"bs\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"g^\")\n",
"\n",
"zz = y_proba[:, 1].reshape(x0.shape)\n",
"contour = plt.contour(x0, x1, zz, cmap=plt.cm.brg)\n",
"\n",
"\n",
"left_right = np.array([2.9, 7])\n",
"boundary = -(log_reg.coef_[0][0] * left_right + log_reg.intercept_[0]) / log_reg.coef_[0][1]\n",
"\n",
"plt.clabel(contour, inline=1, fontsize=12)\n",
"plt.plot(left_right, boundary, \"k--\", linewidth=3)\n",
"plt.text(3.5, 1.5, \"Not Iris-Virginica\", fontsize=14, color=\"b\", ha=\"center\")\n",
"plt.text(6.5, 2.3, \"Iris-Virginica\", fontsize=14, color=\"g\", ha=\"center\")\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.axis([2.9, 7, 0.8, 2.7])\n",
"save_fig(\"logistic_regression_contour_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = iris[\"target\"]\n",
"\n",
"softmax_reg = LogisticRegression(multi_class=\"multinomial\", solver=\"lbfgs\", C=10)\n",
"softmax_reg.fit(X, y)\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(0, 8, 500).reshape(-1, 1),\n",
" np.linspace(0, 3.5, 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"\n",
"\n",
"y_proba = softmax_reg.predict_proba(X_new)\n",
"y_predict = softmax_reg.predict(X_new)\n",
"\n",
"zz1 = y_proba[:, 1].reshape(x0.shape)\n",
"zz = y_predict.reshape(x0.shape)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==2, 0], X[y==2, 1], \"g^\", label=\"Iris-Virginica\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"bs\", label=\"Iris-Versicolour\")\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"yo\", label=\"Iris-Setosa\")\n",
"\n",
"from matplotlib.colors import ListedColormap\n",
"custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n",
"\n",
"plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n",
"contour = plt.contour(x0, x1, zz1, cmap=plt.cm.brg)\n",
"plt.clabel(contour, inline=1, fontsize=12)\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.legend(loc=\"center left\", fontsize=14)\n",
"plt.axis([0, 7, 0, 3.5])\n",
"save_fig(\"softmax_regression_contour_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"softmax_reg.predict([[5, 2]])"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"softmax_reg.predict_proba([[5, 2]])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
},
"toc": {
"toc_cell": false,
"toc_number_sections": true,
"toc_threshold": 6,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}