diff --git a/.gitignore b/.gitignore index a5684d8..89c7162 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,10 @@ -.ipynb_checkpoints -.DS_Store -my_* -images/**/*.png +*.bak +*.ckpt *.pyc +.DS_Store +.ipynb_checkpoints +checkpoint +logs/* +tf_logs/* +images/**/*.png +my_* diff --git a/fundamentals.ipynb b/01_the_machine_learning_landscape.ipynb similarity index 99% rename from fundamentals.ipynb rename to 01_the_machine_learning_landscape.ipynb index df6d6dc..9647fa6 100644 --- a/fundamentals.ipynb +++ b/01_the_machine_learning_landscape.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Chapter 1 – Fundamentals of Machine Learning**\n", + "**Chapter 1 – The Machine Learning landscape**\n", "\n", "_This is the code used to generate some of the figures in chapter 1._" ] diff --git a/end_to_end_project.ipynb b/02_end_to_end_machine_learning_project.ipynb similarity index 99% rename from end_to_end_project.ipynb rename to 02_end_to_end_machine_learning_project.ipynb index 875a1be..25278be 100644 --- a/end_to_end_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Chapter 2 – End to end Machine Learning project**\n", + "**Chapter 2 – End-to-end Machine Learning project**\n", "\n", "*Welcome to Machine Learning Housing Corp.! Your task is to predict median house values in Californian districts, given a number of features from these districts.*\n", "\n", diff --git a/training_linear_models.ipynb b/04_training_linear_models.ipynb similarity index 100% rename from training_linear_models.ipynb rename to 04_training_linear_models.ipynb diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb new file mode 100644 index 0000000..42937b0 --- /dev/null +++ b/05_support_vector_machines.ipynb @@ -0,0 +1,1248 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 5 – Support Vector Machines**\n", + "\n", + "_This notebook contains all the sample code and solutions to the exercices in chapter 5._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"svm\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Large margin classification" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn import datasets\n", + "\n", + "iris = datasets.load_iris()\n", + "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", + "y = iris[\"target\"]\n", + "\n", + "setosa_or_versicolour = (y == 0) | (y == 1)\n", + "X = X[setosa_or_versicolour]\n", + "y = y[setosa_or_versicolour]\n", + "\n", + "# SVM Classifier model\n", + "svm_clf = SVC(kernel=\"linear\", C=float(\"inf\"))\n", + "svm_clf.fit(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Bad models\n", + "x0 = np.linspace(0, 5.5, 200)\n", + "pred_1 = 5*x0 - 20\n", + "pred_2 = x0 - 1.8\n", + "pred_3 = 0.1 * x0 + 0.5\n", + "\n", + "def plot_svc_decision_boundary(svm_clf, xmin, xmax):\n", + " w = svm_clf.coef_[0]\n", + " b = svm_clf.intercept_[0]\n", + "\n", + " # At the decision boundary, w0*x0 + w1*x1 + b = 0\n", + " # => x1 = -w0/w1 * x0 - b/w1\n", + " x0 = np.linspace(xmin, xmax, 200)\n", + " decision_boundary = -w[0]/w[1] * x0 - b/w[1]\n", + "\n", + " margin = 1/w[1]\n", + " gutter_up = decision_boundary + margin\n", + " gutter_down = decision_boundary - margin\n", + "\n", + " svs = svm_clf.support_vectors_\n", + " plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA')\n", + " plt.plot(x0, decision_boundary, \"k-\", linewidth=2)\n", + " plt.plot(x0, gutter_up, \"k--\", linewidth=2)\n", + " plt.plot(x0, gutter_down, \"k--\", linewidth=2)\n", + "\n", + "plt.figure(figsize=(12,2.7))\n", + "\n", + "plt.subplot(121)\n", + "plt.plot(x0, pred_1, \"g--\", linewidth=2)\n", + "plt.plot(x0, pred_2, \"m-\", linewidth=2)\n", + "plt.plot(x0, pred_3, \"r-\", linewidth=2)\n", + "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\", label=\"Iris-Versicolour\")\n", + "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\", label=\"Iris-Setosa\")\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.legend(loc=\"upper left\", fontsize=14)\n", + "plt.axis([0, 5.5, 0, 2])\n", + "\n", + "plt.subplot(122)\n", + "plot_svc_decision_boundary(svm_clf, 0, 5.5)\n", + "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\")\n", + "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\")\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.axis([0, 5.5, 0, 2])\n", + "\n", + "save_fig(\"large_margin_classification_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sensitivity to feature scales" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Xs = np.array([[1, 50], [5, 20], [3, 80], [5, 60]]).astype(np.float64)\n", + "ys = np.array([0, 0, 1, 1])\n", + "svm_clf = SVC(kernel=\"linear\", C=100)\n", + "svm_clf.fit(Xs, ys)\n", + "\n", + "plt.figure(figsize=(12,3.2))\n", + "plt.subplot(121)\n", + "plt.plot(Xs[:, 0][ys==1], Xs[:, 1][ys==1], \"bo\")\n", + "plt.plot(Xs[:, 0][ys==0], Xs[:, 1][ys==0], \"ms\")\n", + "plot_svc_decision_boundary(svm_clf, 0, 6)\n", + "plt.xlabel(\"$x_0$\", fontsize=20)\n", + "plt.ylabel(\"$x_1$ \", fontsize=20, rotation=0)\n", + "plt.title(\"Unscaled\", fontsize=16)\n", + "plt.axis([0, 6, 0, 90])\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "X_scaled = scaler.fit_transform(Xs)\n", + "svm_clf.fit(X_scaled, ys)\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(X_scaled[:, 0][ys==1], X_scaled[:, 1][ys==1], \"bo\")\n", + "plt.plot(X_scaled[:, 0][ys==0], X_scaled[:, 1][ys==0], \"ms\")\n", + "plot_svc_decision_boundary(svm_clf, -2, 2)\n", + "plt.xlabel(\"$x_0$\", fontsize=20)\n", + "plt.title(\"Scaled\", fontsize=16)\n", + "plt.axis([-2, 2, -2, 2])\n", + "\n", + "save_fig(\"sensitivity_to_feature_scales_plot\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sensitivity to outliers" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_outliers = np.array([[3.4, 1.3], [3.2, 0.8]])\n", + "y_outliers = np.array([0, 0])\n", + "Xo1 = np.concatenate([X, X_outliers[:1]], axis=0)\n", + "yo1 = np.concatenate([y, y_outliers[:1]], axis=0)\n", + "Xo2 = np.concatenate([X, X_outliers[1:]], axis=0)\n", + "yo2 = np.concatenate([y, y_outliers[1:]], axis=0)\n", + "\n", + "svm_clf2 = SVC(kernel=\"linear\", C=10**9)#float(\"inf\"))\n", + "svm_clf2.fit(Xo2, yo2)\n", + "\n", + "plt.figure(figsize=(12,2.7))\n", + "\n", + "plt.subplot(121)\n", + "plt.plot(Xo1[:, 0][yo1==1], Xo1[:, 1][yo1==1], \"bs\")\n", + "plt.plot(Xo1[:, 0][yo1==0], Xo1[:, 1][yo1==0], \"yo\")\n", + "plt.text(0.3, 1.0, \"Impossible!\", fontsize=24, color=\"red\")\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.annotate(\"Outlier\",\n", + " xy=(X_outliers[0][0], X_outliers[0][1]),\n", + " xytext=(2.5, 1.7),\n", + " ha=\"center\",\n", + " arrowprops=dict(facecolor='black', shrink=0.1),\n", + " fontsize=16,\n", + " )\n", + "plt.axis([0, 5.5, 0, 2])\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(Xo2[:, 0][yo2==1], Xo2[:, 1][yo2==1], \"bs\")\n", + "plt.plot(Xo2[:, 0][yo2==0], Xo2[:, 1][yo2==0], \"yo\")\n", + "plot_svc_decision_boundary(svm_clf2, 0, 5.5)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.annotate(\"Outlier\",\n", + " xy=(X_outliers[1][0], X_outliers[1][1]),\n", + " xytext=(3.2, 0.08),\n", + " ha=\"center\",\n", + " arrowprops=dict(facecolor='black', shrink=0.1),\n", + " fontsize=16,\n", + " )\n", + "plt.axis([0, 5.5, 0, 2])\n", + "\n", + "save_fig(\"sensitivity_to_outliers_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Large margin *vs* margin violations" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn import datasets\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.svm import LinearSVC\n", + "\n", + "iris = datasets.load_iris()\n", + "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", + "y = (iris[\"target\"] == 2).astype(np.float64) # Iris-Virginica\n", + "\n", + "scaler = StandardScaler()\n", + "svm_clf1 = LinearSVC(C=100, loss=\"hinge\")\n", + "svm_clf2 = LinearSVC(C=1, loss=\"hinge\")\n", + "\n", + "scaled_svm_clf1 = Pipeline((\n", + " (\"scaler\", scaler),\n", + " (\"linear_svc\", svm_clf1),\n", + " ))\n", + "scaled_svm_clf2 = Pipeline((\n", + " (\"scaler\", scaler),\n", + " (\"linear_svc\", svm_clf2),\n", + " ))\n", + "\n", + "scaled_svm_clf1.fit(X, y)\n", + "scaled_svm_clf2.fit(X, y)\n", + "\n", + "scaled_svm_clf2.predict([[5.5, 1.7]])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Convert to unscaled parameters\n", + "b1 = svm_clf1.decision_function([-scaler.mean_ / scaler.scale_])\n", + "b2 = svm_clf2.decision_function([-scaler.mean_ / scaler.scale_])\n", + "w1 = svm_clf1.coef_[0] / scaler.scale_\n", + "w2 = svm_clf2.coef_[0] / scaler.scale_\n", + "svm_clf1.intercept_ = np.array([b1])\n", + "svm_clf2.intercept_ = np.array([b2])\n", + "svm_clf1.coef_ = np.array([w1])\n", + "svm_clf2.coef_ = np.array([w2])\n", + "\n", + "# Find support vectors (LinearSVC does not do this automatically)\n", + "t = y * 2 - 1\n", + "support_vectors_idx1 = (t * (X.dot(w1) + b1) < 1).ravel()\n", + "support_vectors_idx2 = (t * (X.dot(w2) + b2) < 1).ravel()\n", + "svm_clf1.support_vectors_ = X[support_vectors_idx1]\n", + "svm_clf2.support_vectors_ = X[support_vectors_idx2]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(12,3.2))\n", + "plt.subplot(121)\n", + "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\", label=\"Iris-Virginica\")\n", + "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\", label=\"Iris-Versicolour\")\n", + "plot_svc_decision_boundary(svm_clf1, 4, 6)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.legend(loc=\"upper left\", fontsize=14)\n", + "plt.title(\"$C = {}$\".format(svm_clf1.C), fontsize=16)\n", + "plt.axis([4, 6, 0.8, 2.8])\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")\n", + "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n", + "plot_svc_decision_boundary(svm_clf2, 4, 6)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.title(\"$C = {}$\".format(svm_clf2.C), fontsize=16)\n", + "plt.axis([4, 6, 0.8, 2.8])\n", + "\n", + "save_fig(\"regularization_plot\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Non-linear classification" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X1D = np.linspace(-4, 4, 9).reshape(-1, 1)\n", + "X2D = np.c_[X1D, X1D**2]\n", + "y = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0])\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "\n", + "plt.subplot(121)\n", + "plt.grid(True, which='both')\n", + "plt.axhline(y=0, color='k')\n", + "plt.plot(X1D[:, 0][y==0], np.zeros(4), \"bs\")\n", + "plt.plot(X1D[:, 0][y==1], np.zeros(5), \"g^\")\n", + "plt.gca().get_yaxis().set_ticks([])\n", + "plt.xlabel(r\"$x_1$\", fontsize=20)\n", + "plt.axis([-4.5, 4.5, -0.2, 0.2])\n", + "\n", + "plt.subplot(122)\n", + "plt.grid(True, which='both')\n", + "plt.axhline(y=0, color='k')\n", + "plt.axvline(x=0, color='k')\n", + "plt.plot(X2D[:, 0][y==0], X2D[:, 1][y==0], \"bs\")\n", + "plt.plot(X2D[:, 0][y==1], X2D[:, 1][y==1], \"g^\")\n", + "plt.xlabel(r\"$x_1$\", fontsize=20)\n", + "plt.ylabel(r\"$x_2$\", fontsize=20, rotation=0)\n", + "plt.gca().get_yaxis().set_ticks([0, 4, 8, 12, 16])\n", + "plt.plot([-4.5, 4.5], [6.5, 6.5], \"r--\", linewidth=3)\n", + "plt.axis([-4.5, 4.5, -1, 17])\n", + "\n", + "plt.subplots_adjust(right=1)\n", + "\n", + "save_fig(\"higher_dimensions_plot\", tight_layout=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import make_moons\n", + "X, y = make_moons(n_samples=100, noise=0.15, random_state=42)\n", + "\n", + "def plot_dataset(X, y, axes):\n", + " plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n", + " plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")\n", + " plt.axis(axes)\n", + " plt.grid(True, which='both')\n", + " plt.xlabel(r\"$x_1$\", fontsize=20)\n", + " plt.ylabel(r\"$x_2$\", fontsize=20, rotation=0)\n", + "\n", + "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "\n", + "polynomial_svm_clf = Pipeline((\n", + " (\"poly_features\", PolynomialFeatures(degree=3)),\n", + " (\"scaler\", StandardScaler()),\n", + " (\"svm_clf\", LinearSVC(C=10, loss=\"hinge\"))\n", + " ))\n", + "\n", + "polynomial_svm_clf.fit(X, y)\n", + "\n", + "def plot_predictions(clf, axes):\n", + " x0s = np.linspace(axes[0], axes[1], 100)\n", + " x1s = np.linspace(axes[2], axes[3], 100)\n", + " x0, x1 = np.meshgrid(x0s, x1s)\n", + " X = np.c_[x0.ravel(), x1.ravel()]\n", + " y_pred = clf.predict(X).reshape(x0.shape)\n", + " y_decision = clf.decision_function(X).reshape(x0.shape)\n", + " plt.contourf(x0, x1, y_pred, cmap=plt.cm.brg, alpha=0.2)\n", + " plt.contourf(x0, x1, y_decision, cmap=plt.cm.brg, alpha=0.1)\n", + "\n", + "plot_predictions(polynomial_svm_clf, [-1.5, 2.5, -1, 1.5])\n", + "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n", + "\n", + "save_fig(\"moons_polynomial_svc_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "poly_kernel_svm_clf = Pipeline((\n", + " (\"scaler\", StandardScaler()),\n", + " (\"svm_clf\", SVC(kernel=\"poly\", degree=3, coef0=1, C=5))\n", + " ))\n", + "poly100_kernel_svm_clf = Pipeline((\n", + " (\"scaler\", StandardScaler()),\n", + " (\"svm_clf\", SVC(kernel=\"poly\", degree=10, coef0=100, C=5))\n", + " ))\n", + "\n", + "poly_kernel_svm_clf.fit(X, y)\n", + "poly100_kernel_svm_clf.fit(X, y)\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "\n", + "plt.subplot(121)\n", + "plot_predictions(poly_kernel_svm_clf, [-1.5, 2.5, -1, 1.5])\n", + "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n", + "plt.title(r\"$d=3, r=1, C=5$\", fontsize=18)\n", + "\n", + "plt.subplot(122)\n", + "plot_predictions(poly100_kernel_svm_clf, [-1.5, 2.5, -1, 1.5])\n", + "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n", + "plt.title(r\"$d=10, r=100, C=5$\", fontsize=18)\n", + "\n", + "save_fig(\"moons_kernelized_polynomial_svc_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "def gaussian_rbf(x, landmark, gamma):\n", + " return np.exp(-gamma * np.linalg.norm(x - landmark, axis=1)**2)\n", + "\n", + "gamma = 0.3\n", + "\n", + "x1s = np.linspace(-4.5, 4.5, 200).reshape(-1, 1)\n", + "x2s = gaussian_rbf(x1s, -2, gamma)\n", + "x3s = gaussian_rbf(x1s, 1, gamma)\n", + "\n", + "XK = np.c_[gaussian_rbf(X1D, -2, gamma), gaussian_rbf(X1D, 1, gamma)]\n", + "yk = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0])\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "\n", + "plt.subplot(121)\n", + "plt.grid(True, which='both')\n", + "plt.axhline(y=0, color='k')\n", + "plt.scatter(x=[-2, 1], y=[0, 0], s=150, alpha=0.5, c=\"red\")\n", + "plt.plot(X1D[:, 0][yk==0], np.zeros(4), \"bs\")\n", + "plt.plot(X1D[:, 0][yk==1], np.zeros(5), \"g^\")\n", + "plt.plot(x1s, x2s, \"g--\")\n", + "plt.plot(x1s, x3s, \"b:\")\n", + "plt.gca().get_yaxis().set_ticks([0, 0.25, 0.5, 0.75, 1])\n", + "plt.xlabel(r\"$x_1$\", fontsize=20)\n", + "plt.ylabel(r\"Similarity\", fontsize=14)\n", + "plt.annotate(r'$\\mathbf{x}$',\n", + " xy=(X1D[3, 0], 0),\n", + " xytext=(-0.5, 0.20),\n", + " ha=\"center\",\n", + " arrowprops=dict(facecolor='black', shrink=0.1),\n", + " fontsize=18,\n", + " )\n", + "plt.text(-2, 0.9, \"$x_2$\", ha=\"center\", fontsize=20)\n", + "plt.text(1, 0.9, \"$x_3$\", ha=\"center\", fontsize=20)\n", + "plt.axis([-4.5, 4.5, -0.1, 1.1])\n", + "\n", + "plt.subplot(122)\n", + "plt.grid(True, which='both')\n", + "plt.axhline(y=0, color='k')\n", + "plt.axvline(x=0, color='k')\n", + "plt.plot(XK[:, 0][yk==0], XK[:, 1][yk==0], \"bs\")\n", + "plt.plot(XK[:, 0][yk==1], XK[:, 1][yk==1], \"g^\")\n", + "plt.xlabel(r\"$x_2$\", fontsize=20)\n", + "plt.ylabel(r\"$x_3$ \", fontsize=20, rotation=0)\n", + "plt.annotate(r'$\\phi\\left(\\mathbf{x}\\right)$',\n", + " xy=(XK[3, 0], XK[3, 1]),\n", + " xytext=(0.65, 0.50),\n", + " ha=\"center\",\n", + " arrowprops=dict(facecolor='black', shrink=0.1),\n", + " fontsize=18,\n", + " )\n", + "plt.plot([-0.1, 1.1], [0.57, -0.1], \"r--\", linewidth=3)\n", + "plt.axis([-0.1, 1.1, -0.1, 1.1])\n", + " \n", + "plt.subplots_adjust(right=1)\n", + "\n", + "save_fig(\"kernel_method_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x1_example = X1D[3, 0]\n", + "for landmark in (-2, 1):\n", + " k = gaussian_rbf(np.array([[x1_example]]), np.array([[landmark]]), gamma)\n", + " print(\"Phi({}, {}) = {}\".format(x1_example, landmark, k))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rbf_kernel_svm_clf = Pipeline((\n", + " (\"scaler\", StandardScaler()),\n", + " (\"svm_clf\", SVC(kernel=\"rbf\", gamma=5, C=0.001))\n", + " ))\n", + "rbf_kernel_svm_clf.fit(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "\n", + "gamma1, gamma2 = 0.1, 5\n", + "C1, C2 = 0.001, 1000\n", + "hyperparams = (gamma1, C1), (gamma1, C2), (gamma2, C1), (gamma2, C2)\n", + "\n", + "svm_clfs = []\n", + "for gamma, C in hyperparams:\n", + " rbf_kernel_svm_clf = Pipeline((\n", + " (\"scaler\", StandardScaler()),\n", + " (\"svm_clf\", SVC(kernel=\"rbf\", gamma=gamma, C=C))\n", + " ))\n", + " rbf_kernel_svm_clf.fit(X, y)\n", + " svm_clfs.append(rbf_kernel_svm_clf)\n", + "\n", + "plt.figure(figsize=(11, 7))\n", + "\n", + "for i, svm_clf in enumerate(svm_clfs):\n", + " plt.subplot(221 + i)\n", + " plot_predictions(svm_clf, [-1.5, 2.5, -1, 1.5])\n", + " plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n", + " gamma, C = hyperparams[i]\n", + " plt.title(r\"$\\gamma = {}, C = {}$\".format(gamma, C), fontsize=16)\n", + "\n", + "save_fig(\"moons_rbf_svc_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regression\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.svm import LinearSVR\n", + "\n", + "rnd.seed(42)\n", + "m = 50\n", + "X = 2 * rnd.rand(m, 1)\n", + "y = (4 + 3 * X + rnd.randn(m, 1)).ravel()\n", + "\n", + "svm_reg1 = LinearSVR(epsilon=1.5)\n", + "svm_reg2 = LinearSVR(epsilon=0.5)\n", + "svm_reg1.fit(X, y)\n", + "svm_reg2.fit(X, y)\n", + "\n", + "def find_support_vectors(svm_reg, X, y):\n", + " y_pred = svm_reg.predict(X)\n", + " off_margin = (np.abs(y - y_pred) >= svm_reg.epsilon)\n", + " return np.argwhere(off_margin)\n", + "\n", + "svm_reg1.support_ = find_support_vectors(svm_reg1, X, y)\n", + "svm_reg2.support_ = find_support_vectors(svm_reg2, X, y)\n", + "\n", + "eps_x1 = 1\n", + "eps_y_pred = svm_reg1.predict([[eps_x1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def plot_svm_regression(svm_reg, X, y, axes):\n", + " x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1)\n", + " y_pred = svm_reg.predict(x1s)\n", + " plt.plot(x1s, y_pred, \"k-\", linewidth=2, label=r\"$\\hat{y}$\")\n", + " plt.plot(x1s, y_pred + svm_reg.epsilon, \"k--\")\n", + " plt.plot(x1s, y_pred - svm_reg.epsilon, \"k--\")\n", + " plt.scatter(X[svm_reg.support_], y[svm_reg.support_], s=180, facecolors='#FFAAAA')\n", + " plt.plot(X, y, \"bo\")\n", + " plt.xlabel(r\"$x_1$\", fontsize=18)\n", + " plt.legend(loc=\"upper left\", fontsize=18)\n", + " plt.axis(axes)\n", + "\n", + "plt.figure(figsize=(9, 4))\n", + "plt.subplot(121)\n", + "plot_svm_regression(svm_reg1, X, y, [0, 2, 3, 11])\n", + "plt.title(r\"$\\epsilon = {}$\".format(svm_reg1.epsilon), fontsize=18)\n", + "plt.ylabel(r\"$y$\", fontsize=18, rotation=0)\n", + "#plt.plot([eps_x1, eps_x1], [eps_y_pred, eps_y_pred - svm_reg1.epsilon], \"k-\", linewidth=2)\n", + "plt.annotate(\n", + " '', xy=(eps_x1, eps_y_pred), xycoords='data',\n", + " xytext=(eps_x1, eps_y_pred - svm_reg1.epsilon),\n", + " textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5}\n", + " )\n", + "plt.text(0.91, 5.6, r\"$\\epsilon$\", fontsize=20)\n", + "plt.subplot(122)\n", + "plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11])\n", + "plt.title(r\"$\\epsilon = {}$\".format(svm_reg2.epsilon), fontsize=18)\n", + "save_fig(\"svm_regression_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.svm import SVR\n", + "\n", + "rnd.seed(42)\n", + "m = 100\n", + "X = 2 * rnd.rand(m, 1) - 1\n", + "y = (0.2 + 0.1 * X + 0.5 * X**2 + rnd.randn(m, 1)/10).ravel()\n", + "\n", + "svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1)\n", + "svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1)\n", + "svm_poly_reg1.fit(X, y)\n", + "svm_poly_reg2.fit(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(9, 4))\n", + "plt.subplot(121)\n", + "plot_svm_regression(svm_poly_reg1, X, y, [-1, 1, 0, 1])\n", + "plt.title(r\"$degree={}, C={}, \\epsilon = {}$\".format(svm_poly_reg1.degree, svm_poly_reg1.C, svm_poly_reg1.epsilon), fontsize=18)\n", + "plt.ylabel(r\"$y$\", fontsize=18, rotation=0)\n", + "plt.subplot(122)\n", + "plot_svm_regression(svm_poly_reg2, X, y, [-1, 1, 0, 1])\n", + "plt.title(r\"$degree={}, C={}, \\epsilon = {}$\".format(svm_poly_reg2.degree, svm_poly_reg2.C, svm_poly_reg2.epsilon), fontsize=18)\n", + "save_fig(\"svm_with_polynomial_kernel_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Under the hood" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "iris = datasets.load_iris()\n", + "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", + "y = (iris[\"target\"] == 2).astype(np.float64) # Iris-Virginica" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from mpl_toolkits.mplot3d import Axes3D\n", + "\n", + "def plot_3D_decision_function(ax, w, b, x1_lim=[4, 6], x2_lim=[0.8, 2.8]):\n", + " x1_in_bounds = (X[:, 0] > x1_lim[0]) & (X[:, 0] < x1_lim[1])\n", + " X_crop = X[x1_in_bounds]\n", + " y_crop = y[x1_in_bounds]\n", + " x1s = np.linspace(x1_lim[0], x1_lim[1], 20)\n", + " x2s = np.linspace(x2_lim[0], x2_lim[1], 20)\n", + " x1, x2 = np.meshgrid(x1s, x2s)\n", + " xs = np.c_[x1.ravel(), x2.ravel()]\n", + " df = (xs.dot(w) + b).reshape(x1.shape)\n", + " m = 1 / np.linalg.norm(w)\n", + " boundary_x2s = -x1s*(w[0]/w[1])-b/w[1]\n", + " margin_x2s_1 = -x1s*(w[0]/w[1])-(b-1)/w[1]\n", + " margin_x2s_2 = -x1s*(w[0]/w[1])-(b+1)/w[1]\n", + " ax.plot_surface(x1s, x2, 0, color=\"b\", alpha=0.2, cstride=100, rstride=100)\n", + " ax.plot(x1s, boundary_x2s, 0, \"k-\", linewidth=2, label=r\"$h=0$\")\n", + " ax.plot(x1s, margin_x2s_1, 0, \"k--\", linewidth=2, label=r\"$h=\\pm 1$\")\n", + " ax.plot(x1s, margin_x2s_2, 0, \"k--\", linewidth=2)\n", + " ax.plot(X_crop[:, 0][y_crop==1], X_crop[:, 1][y_crop==1], 0, \"g^\")\n", + " ax.plot_wireframe(x1, x2, df, alpha=0.3, color=\"k\")\n", + " ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, \"bs\")\n", + " ax.axis(x1_lim + x2_lim)\n", + " ax.text(4.5, 2.5, 3.8, \"Decision function $h$\", fontsize=15)\n", + " ax.set_xlabel(r\"Petal length\", fontsize=15)\n", + " ax.set_ylabel(r\"Petal width\", fontsize=15)\n", + " ax.set_zlabel(r\"$h = \\mathbf{w}^t \\cdot \\mathbf{x} + b$\", fontsize=18)\n", + " ax.legend(loc=\"upper left\", fontsize=16)\n", + "\n", + "fig = plt.figure(figsize=(11, 6))\n", + "ax1 = fig.add_subplot(111, projection='3d')\n", + "plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])\n", + "\n", + "save_fig(\"iris_3D_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Small weight vector results in a large margin" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def plot_2D_decision_function(w, b, ylabel=True, x1_lim=[-3, 3]):\n", + " x1 = np.linspace(x1_lim[0], x1_lim[1], 200)\n", + " y = w * x1 + b\n", + " m = 1 / w\n", + "\n", + " plt.plot(x1, y)\n", + " plt.plot(x1_lim, [1, 1], \"k:\")\n", + " plt.plot(x1_lim, [-1, -1], \"k:\")\n", + " plt.axhline(y=0, color='k')\n", + " plt.axvline(x=0, color='k')\n", + " plt.plot([m, m], [0, 1], \"k--\")\n", + " plt.plot([-m, -m], [0, -1], \"k--\")\n", + " plt.plot([-m, m], [0, 0], \"k-o\", linewidth=3)\n", + " plt.axis(x1_lim + [-2, 2])\n", + " plt.xlabel(r\"$x_1$\", fontsize=16)\n", + " if ylabel:\n", + " plt.ylabel(r\"$w_1 x_1$ \", rotation=0, fontsize=16)\n", + " plt.title(r\"$w_1 = {}$\".format(w), fontsize=16)\n", + "\n", + "plt.figure(figsize=(12, 3.2))\n", + "plt.subplot(121)\n", + "plot_2D_decision_function(1, 0)\n", + "plt.subplot(122)\n", + "plot_2D_decision_function(0.5, 0, ylabel=False)\n", + "save_fig(\"small_w_large_margin_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn import datasets\n", + "\n", + "iris = datasets.load_iris()\n", + "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", + "y = (iris[\"target\"] == 2).astype(np.float64) # Iris-Virginica\n", + "\n", + "svm_clf = SVC(kernel=\"linear\", C=1)\n", + "svm_clf.fit(X, y)\n", + "svm_clf.predict([[5.3, 1.3]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hinge loss" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "t = np.linspace(-2, 4, 200)\n", + "h = np.where(1 - t < 0, 0, 1 - t) # max(0, 1-t)\n", + "\n", + "plt.figure(figsize=(5,2.8))\n", + "plt.plot(t, h, \"b-\", linewidth=2, label=\"$max(0, 1 - t)$\")\n", + "plt.grid(True, which='both')\n", + "plt.axhline(y=0, color='k')\n", + "plt.axvline(x=0, color='k')\n", + "plt.yticks(np.arange(-1, 2.5, 1))\n", + "plt.xlabel(\"$t$\", fontsize=16)\n", + "plt.axis([-2, 4, -1, 2.5])\n", + "plt.legend(loc=\"upper right\", fontsize=16)\n", + "save_fig(\"hinge_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extra material" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training time" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X, y = make_moons(n_samples=1000, noise=0.4)\n", + "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n", + "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "tol = 0.1\n", + "tols = []\n", + "times = []\n", + "for i in range(10):\n", + " svm_clf = SVC(kernel=\"poly\", gamma=3, C=10, tol=tol, verbose=1)\n", + " t1 = time.time()\n", + " svm_clf.fit(X, y)\n", + " t2 = time.time()\n", + " times.append(t2-t1)\n", + " tols.append(tol)\n", + " print(i, tol, t2-t1)\n", + " tol /= 10\n", + "plt.semilogx(tols, times)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Identical linear classifiers" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.svm import SVC, LinearSVC\n", + "from sklearn.linear_model import SGDClassifier\n", + "from sklearn.datasets import make_moons\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "X, y = make_moons(n_samples=100, noise=0.15, random_state=42)\n", + "\n", + "C = 5\n", + "alpha = 1 / (C * len(X))\n", + "\n", + "sgd_clf = SGDClassifier(loss=\"hinge\", learning_rate=\"constant\", eta0=0.001, alpha=alpha, n_iter=100000, random_state=42)\n", + "svm_clf = SVC(kernel=\"linear\", C=C)\n", + "lin_clf = LinearSVC(loss=\"hinge\", C=C)\n", + "\n", + "X_scaled = StandardScaler().fit_transform(X)\n", + "sgd_clf.fit(X_scaled, y)\n", + "svm_clf.fit(X_scaled, y)\n", + "lin_clf.fit(X_scaled, y)\n", + "\n", + "print(\"SGDClassifier(alpha={}): \".format(sgd_clf1.alpha), sgd_clf.intercept_, sgd_clf.coef_)\n", + "print(\"SVC: \", svm_clf.intercept_, svm_clf.coef_)\n", + "print(\"LinearSVC: \", lin_clf.intercept_, lin_clf.coef_)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear SVM classifier implementation using Batch Gradient Descent" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Training set\n", + "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", + "y = (iris[\"target\"] == 2).astype(np.float64).reshape(-1, 1) # Iris-Virginica" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.base import BaseEstimator\n", + "\n", + "class MyLinearSVC(BaseEstimator):\n", + " def __init__(self, C=1, eta0=1, eta_d=10000, n_epochs=1000, random_state=None):\n", + " self.C = C\n", + " self.eta0 = eta0\n", + " self.n_epochs = n_epochs\n", + " self.random_state = random_state\n", + " self.eta_d = eta_d\n", + "\n", + " def eta(self, epoch):\n", + " return self.eta0 / (epoch + self.eta_d)\n", + " \n", + " def fit(self, X, y):\n", + " # Random initialization\n", + " if self.random_state:\n", + " rnd.seed(self.random_state)\n", + " w = rnd.randn(X.shape[1], 1) # n feature weights\n", + " b = 0\n", + "\n", + " m = len(X)\n", + " t = y * 2 - 1 # -1 if t==0, +1 if t==1\n", + " X_t = X * t\n", + " self.Js=[]\n", + "\n", + " # Training\n", + " for epoch in range(self.n_epochs):\n", + " support_vectors_idx = (X_t.dot(w) + t * b < 1).ravel()\n", + " X_t_sv = X_t[support_vectors_idx]\n", + " t_sv = t[support_vectors_idx]\n", + "\n", + " J = 1/2 * np.sum(w * w) + self.C * (np.sum(1 - X_t_sv.dot(w)) - b * np.sum(t_sv))\n", + " self.Js.append(J)\n", + "\n", + " w_gradient_vector = w - self.C * np.sum(X_t_sv, axis=0).reshape(-1, 1)\n", + " b_derivative = -C * np.sum(t_sv)\n", + " \n", + " w = w - self.eta(epoch) * w_gradient_vector\n", + " b = b - self.eta(epoch) * b_derivative\n", + " \n", + "\n", + " self.intercept_ = np.array([b])\n", + " self.coef_ = np.array([w])\n", + " support_vectors_idx = (X_t.dot(w) + b < 1).ravel()\n", + " self.support_vectors_ = X[support_vectors_idx]\n", + " return self\n", + "\n", + " def decision_function(self, X):\n", + " return X.dot(self.coef_[0]) + self.intercept_[0]\n", + "\n", + " def predict(self, X):\n", + " return (self.decision_function(X) >= 0).astype(np.float64)\n", + "\n", + "C=2\n", + "svm_clf = MyLinearSVC(C=C, eta0 = 10, eta_d = 1000, n_epochs=60000, random_state=2)\n", + "svm_clf.fit(X, y)\n", + "svm_clf.predict(np.array([[5, 2], [4, 1]]))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.plot(range(svm_clf.n_epochs), svm_clf.Js)\n", + "plt.axis([0, svm_clf.n_epochs, 0, 100])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(svm_clf.intercept_, svm_clf.coef_)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "svm_clf2 = SVC(kernel=\"linear\", C=C)\n", + "svm_clf2.fit(X, y.ravel())\n", + "print(svm_clf2.intercept_, svm_clf2.coef_)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "yr = y.ravel()\n", + "plt.figure(figsize=(12,3.2))\n", + "plt.subplot(121)\n", + "plt.plot(X[:, 0][yr==1], X[:, 1][yr==1], \"g^\", label=\"Iris-Virginica\")\n", + "plt.plot(X[:, 0][yr==0], X[:, 1][yr==0], \"bs\", label=\"Not Iris-Virginica\")\n", + "plot_svc_decision_boundary(svm_clf, 4, 6)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.title(\"MyLinearSVC\", fontsize=14)\n", + "plt.axis([4, 6, 0.8, 2.8])\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(X[:, 0][yr==1], X[:, 1][yr==1], \"g^\")\n", + "plt.plot(X[:, 0][yr==0], X[:, 1][yr==0], \"bs\")\n", + "plot_svc_decision_boundary(svm_clf2, 4, 6)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.title(\"SVC\", fontsize=14)\n", + "plt.axis([4, 6, 0.8, 2.8])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import SGDClassifier\n", + "\n", + "sgd_clf = SGDClassifier(loss=\"hinge\", alpha = 0.017, n_iter = 50, random_state=42)\n", + "sgd_clf.fit(X, y.ravel())\n", + "\n", + "m = len(X)\n", + "t = y * 2 - 1 # -1 if t==0, +1 if t==1\n", + "X_b = np.c_[np.ones((m, 1)), X] # Add bias input x0=1\n", + "X_b_t = X_b * t\n", + "sgd_theta = np.r_[sgd_clf.intercept_[0], sgd_clf.coef_[0]]\n", + "print(sgd_theta)\n", + "support_vectors_idx = (X_b_t.dot(sgd_theta) < 1).ravel()\n", + "sgd_clf.support_vectors_ = X[support_vectors_idx]\n", + "sgd_clf.C = C\n", + "\n", + "plt.figure(figsize=(5.5,3.2))\n", + "plt.plot(X[:, 0][yr==1], X[:, 1][yr==1], \"g^\")\n", + "plt.plot(X[:, 0][yr==0], X[:, 1][yr==0], \"bs\")\n", + "plot_svc_decision_boundary(sgd_clf, 4, 6)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.title(\"SGDClassifier\", fontsize=14)\n", + "plt.axis([4, 6, 0.8, 2.8])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": {}, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/06_decision_trees.ipynb b/06_decision_trees.ipynb new file mode 100644 index 0000000..8e417ab --- /dev/null +++ b/06_decision_trees.ipynb @@ -0,0 +1,506 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 6 – Decision Trees**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 6._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"decision_trees\"\n", + "\n", + "def image_path(fig_id):\n", + " return os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id)\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(image_path(fig_id) + \".png\", format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and visualizing" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", + "\n", + "iris = load_iris()\n", + "X = iris.data[:, 2:] # petal length and width\n", + "y = iris.target\n", + "\n", + "tree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)\n", + "tree_clf.fit(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "export_graphviz(\n", + " tree_clf,\n", + " out_file=image_path(\"iris_tree.dot\"),\n", + " feature_names=iris.feature_names[2:],\n", + " class_names=iris.target_names,\n", + " rounded=True,\n", + " filled=True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from matplotlib.colors import ListedColormap\n", + "\n", + "def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], iris=True, legend=False, plot_training=True):\n", + " x1s = np.linspace(axes[0], axes[1], 100)\n", + " x2s = np.linspace(axes[2], axes[3], 100)\n", + " x1, x2 = np.meshgrid(x1s, x2s)\n", + " X_new = np.c_[x1.ravel(), x2.ravel()]\n", + " y_pred = clf.predict(X_new).reshape(x1.shape)\n", + " custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n", + " plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap, linewidth=10)\n", + " if not iris:\n", + " custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])\n", + " plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)\n", + " if plot_training:\n", + " plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\", label=\"Iris-Setosa\")\n", + " plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\", label=\"Iris-Versicolour\")\n", + " plt.plot(X[:, 0][y==2], X[:, 1][y==2], \"g^\", label=\"Iris-Virginica\")\n", + " plt.axis(axes)\n", + " if iris:\n", + " plt.xlabel(\"Petal length\", fontsize=14)\n", + " plt.ylabel(\"Petal width\", fontsize=14)\n", + " else:\n", + " plt.xlabel(r\"$x_1$\", fontsize=18)\n", + " plt.ylabel(r\"$x_2$\", fontsize=18, rotation=0)\n", + " if legend:\n", + " plt.legend(loc=\"lower right\", fontsize=14)\n", + "\n", + "plt.figure(figsize=(8, 4))\n", + "plot_decision_boundary(tree_clf, X, y)\n", + "plt.plot([2.45, 2.45], [0, 3], \"k-\", linewidth=2)\n", + "plt.plot([2.45, 7.5], [1.75, 1.75], \"k--\", linewidth=2)\n", + "plt.plot([4.95, 4.95], [0, 1.75], \"k:\", linewidth=2)\n", + "plt.plot([4.85, 4.85], [1.75, 3], \"k:\", linewidth=2)\n", + "plt.text(1.40, 1.0, \"Depth=0\", fontsize=15)\n", + "plt.text(3.2, 1.80, \"Depth=1\", fontsize=13)\n", + "plt.text(4.05, 0.5, \"(Depth=2)\", fontsize=11)\n", + "\n", + "save_fig(\"decision_tree_decision_boundaries_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Predicting classes and class probabilities" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tree_clf.predict_proba([[5, 1.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tree_clf.predict([[5, 1.5]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sensitivity to training set details" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X[(X[:, 1]==X[:, 1][y==1].max()) & (y==1)] # widest Iris-Versicolour flower" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "not_widest_versicolour = (X[:, 1]!=1.8) | (y==2)\n", + "X_tweaked = X[not_widest_versicolour]\n", + "y_tweaked = y[not_widest_versicolour]\n", + "\n", + "tree_clf_tweaked = DecisionTreeClassifier(max_depth=2, random_state=40)\n", + "tree_clf_tweaked.fit(X_tweaked, y_tweaked)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 4))\n", + "plot_decision_boundary(tree_clf_tweaked, X_tweaked, y_tweaked, legend=False)\n", + "plt.plot([0, 7.5], [0.8, 0.8], \"k-\", linewidth=2)\n", + "plt.plot([0, 7.5], [1.75, 1.75], \"k--\", linewidth=2)\n", + "plt.text(1.0, 0.9, \"Depth=0\", fontsize=15)\n", + "plt.text(1.0, 1.80, \"Depth=1\", fontsize=13)\n", + "\n", + "save_fig(\"decision_tree_instability_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import make_moons\n", + "Xm, ym = make_moons(n_samples=100, noise=0.25, random_state=53)\n", + "\n", + "deep_tree_clf1 = DecisionTreeClassifier(random_state=42)\n", + "deep_tree_clf2 = DecisionTreeClassifier(min_samples_leaf=4, random_state=42)\n", + "deep_tree_clf1.fit(Xm, ym)\n", + "deep_tree_clf2.fit(Xm, ym)\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "plt.subplot(121)\n", + "plot_decision_boundary(deep_tree_clf1, Xm, ym, axes=[-1.5, 2.5, -1, 1.5], iris=False)\n", + "plt.title(\"No restrictions\", fontsize=16)\n", + "plt.subplot(122)\n", + "plot_decision_boundary(deep_tree_clf2, Xm, ym, axes=[-1.5, 2.5, -1, 1.5], iris=False)\n", + "plt.title(\"min_samples_leaf = {}\".format(deep_tree_clf2.min_samples_leaf), fontsize=14)\n", + "\n", + "save_fig(\"min_samples_leaf_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "angle = np.pi / 180 * 20\n", + "rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])\n", + "Xr = X.dot(rotation_matrix)\n", + "\n", + "tree_clf_r = DecisionTreeClassifier(random_state=42)\n", + "tree_clf_r.fit(Xr, y)\n", + "\n", + "plt.figure(figsize=(8, 3))\n", + "plot_decision_boundary(tree_clf_r, Xr, y, axes=[0.5, 7.5, -1.0, 1], iris=False)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rnd.seed(6)\n", + "Xs = rnd.rand(100, 2) - 0.5\n", + "ys = (Xs[:, 0] > 0).astype(np.float32) * 2\n", + "\n", + "angle = np.pi / 4\n", + "rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])\n", + "Xsr = Xs.dot(rotation_matrix)\n", + "\n", + "tree_clf_s = DecisionTreeClassifier(random_state=42)\n", + "tree_clf_s.fit(Xs, ys)\n", + "tree_clf_sr = DecisionTreeClassifier(random_state=42)\n", + "tree_clf_sr.fit(Xsr, ys)\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "plt.subplot(121)\n", + "plot_decision_boundary(tree_clf_s, Xs, ys, axes=[-0.7, 0.7, -0.7, 0.7], iris=False)\n", + "plt.subplot(122)\n", + "plot_decision_boundary(tree_clf_sr, Xsr, ys, axes=[-0.7, 0.7, -0.7, 0.7], iris=False)\n", + "\n", + "save_fig(\"sensitivity_to_rotation_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regression trees" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeRegressor\n", + "\n", + "# Quadratic training set + noise\n", + "rnd.seed(42)\n", + "m = 200\n", + "X = rnd.rand(m, 1)\n", + "y = 4 * (X - 0.5) ** 2\n", + "y = y + rnd.randn(m, 1) / 10\n", + "\n", + "tree_reg1 = DecisionTreeRegressor(random_state=42, max_depth=2)\n", + "tree_reg2 = DecisionTreeRegressor(random_state=42, max_depth=3)\n", + "tree_reg1.fit(X, y)\n", + "tree_reg2.fit(X, y)\n", + "\n", + "def plot_regression_predictions(tree_reg, X, y, axes=[0, 1, -0.2, 1], ylabel=\"$y$\"):\n", + " x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)\n", + " y_pred = tree_reg.predict(x1)\n", + " plt.axis(axes)\n", + " plt.xlabel(\"$x_1$\", fontsize=18)\n", + " if ylabel:\n", + " plt.ylabel(ylabel, fontsize=18, rotation=0)\n", + " plt.plot(X, y, \"b.\")\n", + " plt.plot(x1, y_pred, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "plt.subplot(121)\n", + "plot_regression_predictions(tree_reg1, X, y)\n", + "for split, style in ((0.1973, \"k-\"), (0.0917, \"k--\"), (0.7718, \"k--\")):\n", + " plt.plot([split, split], [-0.2, 1], style, linewidth=2)\n", + "plt.text(0.21, 0.65, \"Depth=0\", fontsize=15)\n", + "plt.text(0.01, 0.2, \"Depth=1\", fontsize=13)\n", + "plt.text(0.65, 0.8, \"Depth=1\", fontsize=13)\n", + "plt.legend(loc=\"upper center\", fontsize=18)\n", + "plt.title(\"max_depth=2\", fontsize=14)\n", + "\n", + "plt.subplot(122)\n", + "plot_regression_predictions(tree_reg2, X, y, ylabel=None)\n", + "for split, style in ((0.1973, \"k-\"), (0.0917, \"k--\"), (0.7718, \"k--\")):\n", + " plt.plot([split, split], [-0.2, 1], style, linewidth=2)\n", + "for split in (0.0458, 0.1298, 0.2873, 0.9040):\n", + " plt.plot([split, split], [-0.2, 1], \"k:\", linewidth=1)\n", + "plt.text(0.3, 0.5, \"Depth=2\", fontsize=13)\n", + "plt.title(\"max_depth=3\", fontsize=14)\n", + "\n", + "save_fig(\"tree_regression_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "export_graphviz(\n", + " tree_reg1,\n", + " out_file=image_path(\"regression_tree.dot\"),\n", + " feature_names=[\"x1\"],\n", + " rounded=True,\n", + " filled=True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tree_reg1 = DecisionTreeRegressor(random_state=42)\n", + "tree_reg2 = DecisionTreeRegressor(random_state=42, min_samples_leaf=10)\n", + "tree_reg1.fit(X, y)\n", + "tree_reg2.fit(X, y)\n", + "\n", + "x1 = np.linspace(0, 1, 500).reshape(-1, 1)\n", + "y_pred1 = tree_reg1.predict(x1)\n", + "y_pred2 = tree_reg2.predict(x1)\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "\n", + "plt.subplot(121)\n", + "plt.plot(X, y, \"b.\")\n", + "plt.plot(x1, y_pred1, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n", + "plt.axis([0, 1, -0.2, 1.1])\n", + "plt.xlabel(\"$x_1$\", fontsize=18)\n", + "plt.ylabel(\"$y$\", fontsize=18, rotation=0)\n", + "plt.legend(loc=\"upper center\", fontsize=18)\n", + "plt.title(\"No restrictions\", fontsize=14)\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(X, y, \"b.\")\n", + "plt.plot(x1, y_pred2, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n", + "plt.axis([0, 1, -0.2, 1.1])\n", + "plt.xlabel(\"$x_1$\", fontsize=18)\n", + "plt.title(\"min_samples_leaf={}\".format(tree_reg2.min_samples_leaf), fontsize=14)\n", + "\n", + "save_fig(\"tree_regression_regularization_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": { + "height": "309px", + "width": "468px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb new file mode 100644 index 0000000..0c716b8 --- /dev/null +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -0,0 +1,788 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 7 – Ensemble Learning and Random Forests**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 7._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"ensembles\"\n", + "\n", + "def image_path(fig_id):\n", + " return os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id)\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(image_path(fig_id) + \".png\", format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Voting classifiers" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "heads_proba = 0.51\n", + "coin_tosses = (rnd.rand(10000, 10) < heads_proba).astype(np.int32)\n", + "cumulative_heads_ratio = np.cumsum(coin_tosses, axis=0) / np.arange(1, 10001).reshape(-1, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(8,3.5))\n", + "plt.plot(cumulative_heads_ratio)\n", + "plt.plot([0, 10000], [0.51, 0.51], \"k--\", linewidth=2, label=\"51%\")\n", + "plt.plot([0, 10000], [0.5, 0.5], \"k-\", label=\"50%\")\n", + "plt.xlabel(\"Number of coin tosses\")\n", + "plt.ylabel(\"Heads ratio\")\n", + "plt.legend(loc=\"lower right\")\n", + "plt.axis([0, 10000, 0.42, 0.58])\n", + "save_fig(\"law_of_large_numbers_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.datasets import make_moons\n", + "\n", + "X, y = make_moons(n_samples=500, noise=0.30, random_state=42)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.ensemble import VotingClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "\n", + "log_clf = LogisticRegression(random_state=42)\n", + "rnd_clf = RandomForestClassifier(random_state=42)\n", + "svm_clf = SVC(probability=True, random_state=42)\n", + "\n", + "voting_clf = VotingClassifier(\n", + " estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n", + " voting='soft'\n", + " )\n", + "voting_clf.fit(X_train, y_train)\n", + "\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "for clf in (log_clf, rnd_clf, svm_clf, voting_clf):\n", + " clf.fit(X_train, y_train)\n", + " y_pred = clf.predict(X_test)\n", + " print(clf.__class__.__name__, accuracy_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bagging ensembles" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import make_moons\n", + "from sklearn.ensemble import BaggingClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "bag_clf = BaggingClassifier(\n", + " DecisionTreeClassifier(random_state=42), n_estimators=500,\n", + " max_samples=100, bootstrap=True, n_jobs=-1, random_state=42\n", + " )\n", + "bag_clf.fit(X_train, y_train)\n", + "y_pred = bag_clf.predict(X_test)\n", + "print(accuracy_score(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tree_clf = DecisionTreeClassifier(random_state=42)\n", + "tree_clf.fit(X_train, y_train)\n", + "y_pred_tree = tree_clf.predict(X_test)\n", + "print(accuracy_score(y_test, y_pred_tree))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from matplotlib.colors import ListedColormap\n", + "\n", + "def plot_decision_boundary(clf, X, y, axes=[-1.5, 2.5, -1, 1.5], alpha=0.5, contour=True):\n", + " x1s = np.linspace(axes[0], axes[1], 100)\n", + " x2s = np.linspace(axes[2], axes[3], 100)\n", + " x1, x2 = np.meshgrid(x1s, x2s)\n", + " X_new = np.c_[x1.ravel(), x2.ravel()]\n", + " y_pred = clf.predict(X_new).reshape(x1.shape)\n", + " custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n", + " plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap, linewidth=10)\n", + " if contour:\n", + " custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])\n", + " plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)\n", + " plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\", alpha=alpha)\n", + " plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\", alpha=alpha)\n", + " plt.axis(axes)\n", + " plt.xlabel(r\"$x_1$\", fontsize=18)\n", + " plt.ylabel(r\"$x_2$\", fontsize=18, rotation=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(11,4))\n", + "plt.subplot(121)\n", + "plot_decision_boundary(tree_clf, X, y)\n", + "plt.title(\"Decision Tree\", fontsize=14)\n", + "plt.subplot(122)\n", + "plot_decision_boundary(bag_clf, X, y)\n", + "plt.title(\"Decision Trees with Bagging\", fontsize=14)\n", + "save_fig(\"decision_tree_without_and_with_bagging_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Random Forests" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bag_clf = BaggingClassifier(\n", + " DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n", + " n_estimators=500, max_samples=1.0, bootstrap=True,\n", + " n_jobs=-1, random_state=42\n", + " )\n", + "bag_clf.fit(X_train, y_train)\n", + "y_pred = bag_clf.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)\n", + "rnd_clf.fit(X_train, y_train)\n", + "\n", + "y_pred_rf = rnd_clf.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "iris = load_iris()\n", + "rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1, random_state=42)\n", + "rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n", + "for name, importance in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n", + " print(name, \"=\", importance)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rnd_clf.feature_importances_" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(6, 4))\n", + "\n", + "for i in range(15):\n", + " tree_clf = DecisionTreeClassifier(max_leaf_nodes=16, random_state=42+i)\n", + " indices_with_replacement = rnd.randint(0, len(X_train), len(X_train))\n", + " tree_clf.fit(X[indices_with_replacement], y[indices_with_replacement])\n", + " plot_decision_boundary(tree_clf, X, y, axes=[-1.5, 2.5, -1, 1.5], alpha=0.02, contour=False)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Out-of-Bag evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bag_clf = BaggingClassifier(\n", + " DecisionTreeClassifier(random_state=42), n_estimators=500,\n", + " bootstrap=True, n_jobs=-1, oob_score=True, random_state=40\n", + ")\n", + "bag_clf.fit(X_train, y_train)\n", + "bag_clf.oob_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bag_clf.oob_decision_function_[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "y_pred = bag_clf.predict(X_test)\n", + "accuracy_score(y_test, y_pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature importance" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_mldata\n", + "mnist = fetch_mldata('MNIST original')\n", + "rnd_clf = RandomForestClassifier(random_state=42)\n", + "rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def plot_digit(data):\n", + " image = data.reshape(28, 28)\n", + " plt.imshow(image, cmap = matplotlib.cm.hot,\n", + " interpolation=\"nearest\")\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot_digit(rnd_clf.feature_importances_)\n", + "\n", + "cbar = plt.colorbar(ticks=[rnd_clf.feature_importances_.min(), rnd_clf.feature_importances_.max()])\n", + "cbar.ax.set_yticklabels(['Not important', 'Very important'])\n", + "\n", + "save_fig(\"mnist_feature_importance_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AdaBoost" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import AdaBoostClassifier\n", + "\n", + "ada_clf = AdaBoostClassifier(\n", + " DecisionTreeClassifier(max_depth=2), n_estimators=200,\n", + " algorithm=\"SAMME.R\", learning_rate=0.5, random_state=42\n", + " )\n", + "ada_clf.fit(X_train, y_train)\n", + "plot_decision_boundary(ada_clf, X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "m = len(X_train)\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "for subplot, learning_rate in ((121, 1), (122, 0.5)):\n", + " sample_weights = np.ones(m)\n", + " for i in range(5):\n", + " plt.subplot(subplot)\n", + " svm_clf = SVC(kernel=\"rbf\", C=0.05)\n", + " svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n", + " y_pred = svm_clf.predict(X_train)\n", + " sample_weights[y_pred != y_train] *= (1 + learning_rate)\n", + " plot_decision_boundary(svm_clf, X, y, alpha=0.2)\n", + " plt.title(\"learning_rate = {}\".format(learning_rate - 1), fontsize=16)\n", + "\n", + "plt.subplot(121)\n", + "plt.text(-0.7, -0.65, \"1\", fontsize=14)\n", + "plt.text(-0.6, -0.10, \"2\", fontsize=14)\n", + "plt.text(-0.5, 0.10, \"3\", fontsize=14)\n", + "plt.text(-0.4, 0.55, \"4\", fontsize=14)\n", + "plt.text(-0.3, 0.90, \"5\", fontsize=14)\n", + "save_fig(\"boosting_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "list(m for m in dir(ada_clf) if not m.startswith(\"_\") and m.endswith(\"_\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gradient Boosting" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeRegressor\n", + "\n", + "rnd.seed(42)\n", + "X = rnd.rand(100, 1) - 0.5\n", + "y = 3*X[:, 0]**2 + 0.05 * rnd.randn(100)\n", + "\n", + "tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", + "tree_reg1.fit(X, y)\n", + "\n", + "y2 = y - tree_reg1.predict(X)\n", + "tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", + "tree_reg2.fit(X, y2)\n", + "\n", + "y3 = y2 - tree_reg2.predict(X)\n", + "tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)\n", + "tree_reg3.fit(X, y3)\n", + "\n", + "X_new = np.array([[0.8]])\n", + "y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))\n", + "print(y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def plot_predictions(regressors, X, y, axes, label=None, style=\"r-\", data_style=\"b.\", data_label=None):\n", + " x1 = np.linspace(axes[0], axes[1], 500)\n", + " y_pred = sum(regressor.predict(x1.reshape(-1, 1)) for regressor in regressors)\n", + " plt.plot(X[:, 0], y, data_style, label=data_label)\n", + " plt.plot(x1, y_pred, style, linewidth=2, label=label)\n", + " if label or data_label:\n", + " plt.legend(loc=\"upper center\", fontsize=16)\n", + " plt.axis(axes)\n", + "\n", + "plt.figure(figsize=(11,11))\n", + "\n", + "plt.subplot(321)\n", + "plot_predictions([tree_reg1], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h_1(x_1)$\", style=\"g-\", data_label=\"Training set\")\n", + "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n", + "plt.title(\"Residuals and tree predictions\", fontsize=16)\n", + "\n", + "plt.subplot(322)\n", + "plot_predictions([tree_reg1], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h(x_1) = h_1(x_1)$\", data_label=\"Training set\")\n", + "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n", + "plt.title(\"Ensemble predictions\", fontsize=16)\n", + "\n", + "plt.subplot(323)\n", + "plot_predictions([tree_reg2], X, y2, axes=[-0.5, 0.5, -0.5, 0.5], label=\"$h_2(x_1)$\", style=\"g-\", data_style=\"k+\", data_label=\"Residuals\")\n", + "plt.ylabel(\"$y - h_1(x_1)$\", fontsize=16)\n", + "\n", + "plt.subplot(324)\n", + "plot_predictions([tree_reg1, tree_reg2], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h(x_1) = h_1(x_1) + h_2(x_1)$\")\n", + "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n", + "\n", + "plt.subplot(325)\n", + "plot_predictions([tree_reg3], X, y3, axes=[-0.5, 0.5, -0.5, 0.5], label=\"$h_3(x_1)$\", style=\"g-\", data_style=\"k+\")\n", + "plt.ylabel(\"$y - h_1(x_1) - h_2(x_1)$\", fontsize=16)\n", + "plt.xlabel(\"$x_1$\", fontsize=16)\n", + "\n", + "plt.subplot(326)\n", + "plot_predictions([tree_reg1, tree_reg2, tree_reg3], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h(x_1) = h_1(x_1) + h_2(x_1) + h_3(x_1)$\")\n", + "plt.xlabel(\"$x_1$\", fontsize=16)\n", + "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n", + "\n", + "save_fig(\"gradient_boosting_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import GradientBoostingRegressor\n", + "\n", + "gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=0.1, random_state=42)\n", + "gbrt.fit(X, y)\n", + "\n", + "gbrt_slow = GradientBoostingRegressor(max_depth=2, n_estimators=200, learning_rate=0.1, random_state=42)\n", + "gbrt_slow.fit(X, y)\n", + "\n", + "plt.figure(figsize=(11,4))\n", + "\n", + "plt.subplot(121)\n", + "plot_predictions([gbrt], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"Ensemble predictions\")\n", + "plt.title(\"learning_rate={}, n_estimators={}\".format(gbrt.learning_rate, gbrt.n_estimators), fontsize=14)\n", + "\n", + "plt.subplot(122)\n", + "plot_predictions([gbrt_slow], X, y, axes=[-0.5, 0.5, -0.1, 0.8])\n", + "plt.title(\"learning_rate={}, n_estimators={}\".format(gbrt_slow.learning_rate, gbrt_slow.n_estimators), fontsize=14)\n", + "\n", + "save_fig(\"gbrt_learning_rate_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Gradient Boosting with Early stopping" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "X_train, X_val, y_train, y_val = train_test_split(X, y)\n", + "\n", + "gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=0.1, random_state=42)\n", + "gbrt.fit(X_train, y_train)\n", + "\n", + "errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "best_n_estimators = np.argmin(errors)\n", + "min_error = errors[best_n_estimators]\n", + "\n", + "gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=best_n_estimators, learning_rate=0.1, random_state=42)\n", + "gbrt_best.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(11, 4))\n", + "\n", + "plt.subplot(121)\n", + "plt.plot(errors, \"b.-\")\n", + "plt.plot([best_n_estimators, best_n_estimators], [0, min_error], \"k--\")\n", + "plt.plot([0, 120], [min_error, min_error], \"k--\")\n", + "plt.plot(best_n_estimators, min_error, \"ko\")\n", + "plt.text(best_n_estimators, min_error*1.2, \"Minimum\", ha=\"center\", fontsize=14)\n", + "plt.axis([0, 120, 0, 0.01])\n", + "plt.xlabel(\"Number of trees\")\n", + "plt.title(\"Validation error\", fontsize=14)\n", + "\n", + "plt.subplot(122)\n", + "plot_predictions([gbrt_best], X, y, axes=[-0.5, 0.5, -0.1, 0.8])\n", + "plt.title(\"Best model (55 trees)\", fontsize=14)\n", + "\n", + "save_fig(\"early_stopping_gbrt_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=1, learning_rate=0.1, random_state=42, warm_start=True)\n", + "\n", + "min_val_error = float(\"inf\")\n", + "error_going_up = 0\n", + "for n_estimators in range(1, 120):\n", + " gbrt.n_estimators = n_estimators\n", + " gbrt.fit(X_train, y_train)\n", + " y_pred = gbrt.predict(X_val)\n", + " val_error = mean_squared_error(y_val, y_pred)\n", + " if val_error < min_val_error:\n", + " min_val_error = val_error\n", + " error_going_up = 0\n", + " else:\n", + " error_going_up += 1\n", + " if error_going_up == 5:\n", + " break # early stopping" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(gbrt.n_estimators)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": { + "height": "252px", + "width": "333px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb new file mode 100644 index 0000000..fa135f9 --- /dev/null +++ b/08_dimensionality_reduction.ipynb @@ -0,0 +1,1343 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 8 – Dimensionality Reduction**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 8._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"dim_reduction\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Projection methods\n", + "Build 3D dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rnd.seed(4)\n", + "m = 60\n", + "w1, w2 = 0.1, 0.3\n", + "noise = 0.1\n", + "\n", + "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n", + "X = np.empty((m, 3))\n", + "X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n", + "X[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n", + "X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * rnd.randn(m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mean normalize the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = X - X.mean(axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apply PCA to reduce to 2D." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.decomposition import PCA\n", + "\n", + "pca = PCA(n_components = 2)\n", + "X2D = pca.fit_transform(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Recover the 3D points projected on the plane (PCA 2D subspace)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X2D_inv = pca.inverse_transform(X2D)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Utility class to draw 3D arrows (copied from http://stackoverflow.com/questions/11140163)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from matplotlib.patches import FancyArrowPatch\n", + "from mpl_toolkits.mplot3d import proj3d\n", + "\n", + "class Arrow3D(FancyArrowPatch):\n", + " def __init__(self, xs, ys, zs, *args, **kwargs):\n", + " FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)\n", + " self._verts3d = xs, ys, zs\n", + "\n", + " def draw(self, renderer):\n", + " xs3d, ys3d, zs3d = self._verts3d\n", + " xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)\n", + " self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))\n", + " FancyArrowPatch.draw(self, renderer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Express the plane as a function of x and y." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "axes = [-1.8, 1.8, -1.3, 1.3, -1.0, 1.0]\n", + "\n", + "x1s = np.linspace(axes[0], axes[1], 10)\n", + "x2s = np.linspace(axes[2], axes[3], 10)\n", + "x1, x2 = np.meshgrid(x1s, x2s)\n", + "\n", + "C = pca.components_\n", + "R = C.T.dot(C)\n", + "z = (R[0, 2] * x1 + R[1, 2] * x2) / (1 - R[2, 2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the 3D dataset, the plane and the projections on that plane." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from mpl_toolkits.mplot3d import Axes3D\n", + "\n", + "fig = plt.figure(figsize=(6, 3.8))\n", + "ax = fig.add_subplot(111, projection='3d')\n", + "\n", + "X3D_above = X[X[:, 2] > X2D_inv[:, 2]]\n", + "X3D_below = X[X[:, 2] <= X2D_inv[:, 2]]\n", + "\n", + "ax.plot(X3D_below[:, 0], X3D_below[:, 1], X3D_below[:, 2], \"bo\", alpha=0.5)\n", + "\n", + "ax.plot_surface(x1, x2, z, alpha=0.2, color=\"k\")\n", + "np.linalg.norm(C, axis=0)\n", + "ax.add_artist(Arrow3D([0, C[0, 0]],[0, C[0, 1]],[0, C[0, 2]], mutation_scale=15, lw=1, arrowstyle=\"-|>\", color=\"k\"))\n", + "ax.add_artist(Arrow3D([0, C[1, 0]],[0, C[1, 1]],[0, C[1, 2]], mutation_scale=15, lw=1, arrowstyle=\"-|>\", color=\"k\"))\n", + "ax.plot([0], [0], [0], \"k.\")\n", + "\n", + "for i in range(m):\n", + " if X[i, 2] > X2D_inv[i, 2]:\n", + " ax.plot([X[i][0], X2D_inv[i][0]], [X[i][1], X2D_inv[i][1]], [X[i][2], X2D_inv[i][2]], \"k-\")\n", + " else:\n", + " ax.plot([X[i][0], X2D_inv[i][0]], [X[i][1], X2D_inv[i][1]], [X[i][2], X2D_inv[i][2]], \"k-\", color=\"#505050\")\n", + " \n", + "ax.plot(X2D_inv[:, 0], X2D_inv[:, 1], X2D_inv[:, 2], \"k+\")\n", + "ax.plot(X2D_inv[:, 0], X2D_inv[:, 1], X2D_inv[:, 2], \"k.\")\n", + "ax.plot(X3D_above[:, 0], X3D_above[:, 1], X3D_above[:, 2], \"bo\")\n", + "ax.set_xlabel(\"$x_1$\", fontsize=18)\n", + "ax.set_ylabel(\"$x_2$\", fontsize=18)\n", + "ax.set_zlabel(\"$x_3$\", fontsize=18)\n", + "ax.set_xlim(axes[0:2])\n", + "ax.set_ylim(axes[2:4])\n", + "ax.set_zlim(axes[4:6])\n", + "\n", + "save_fig(\"dataset_3d_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "ax = fig.add_subplot(111, aspect='equal')\n", + "\n", + "ax.plot(X2D[:, 0], X2D[:, 1], \"k+\")\n", + "ax.plot(X2D[:, 0], X2D[:, 1], \"k.\")\n", + "ax.plot([0], [0], \"ko\")\n", + "ax.arrow(0, 0, 0, 1, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n", + "ax.arrow(0, 0, 1, 0, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n", + "ax.set_xlabel(\"$z_1$\", fontsize=18)\n", + "ax.set_ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + "ax.axis([-1.5, 1.3, -1.2, 1.2])\n", + "ax.grid(True)\n", + "save_fig(\"dataset_2d_plot\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "PCA using SVD decomposition" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "m, n = X.shape\n", + "\n", + "X_centered = X - X.mean(axis=0)\n", + "U, s, V = np.linalg.svd(X_centered)\n", + "c1 = V.T[:, 0]\n", + "c2 = V.T[:, 1]\n", + "\n", + "S = np.zeros(X.shape)\n", + "S[:n, :n] = np.diag(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(X, U.dot(S).dot(V))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "T = X.dot(V.T[:, :2])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(T, U.dot(S)[:, :2])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.decomposition import PCA\n", + "pca = PCA(n_components = 2)\n", + "X2D_p = pca.fit_transform(X)\n", + "np.allclose(X2D_p, T)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X3D_recover = T.dot(V[:2, :])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(X3D_recover, pca.inverse_transform(X2D_p))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "V" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pca.components_" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "R = pca.components_.T.dot(pca.components_)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "S[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pca.explained_variance_ratio_" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "1 - pca.explained_variance_ratio_.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "S[0,0]**2/(S**2).sum(), S[1,1]**2/(S**2).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.sqrt((T[:, 1]**2).sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Manifold learning\n", + "Swiss roll:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.datasets import make_swiss_roll\n", + "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "axes = [-11.5, 14, -2, 23, -12, 15]\n", + "\n", + "fig = plt.figure(figsize=(6, 5))\n", + "ax = fig.add_subplot(111, projection='3d')\n", + "\n", + "ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, cmap=plt.cm.hot)\n", + "ax.view_init(10, -70)\n", + "ax.set_xlabel(\"$x_1$\", fontsize=18)\n", + "ax.set_ylabel(\"$x_2$\", fontsize=18)\n", + "ax.set_zlabel(\"$x_3$\", fontsize=18)\n", + "ax.set_xlim(axes[0:2])\n", + "ax.set_ylim(axes[2:4])\n", + "ax.set_zlim(axes[4:6])\n", + "\n", + "save_fig(\"swiss_roll_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(11, 4))\n", + "\n", + "plt.subplot(121)\n", + "plt.scatter(X[:, 0], X[:, 1], c=t, cmap=plt.cm.hot)\n", + "plt.axis(axes[:4])\n", + "plt.xlabel(\"$x_1$\", fontsize=18)\n", + "plt.ylabel(\"$x_2$\", fontsize=18, rotation=0)\n", + "plt.grid(True)\n", + "\n", + "plt.subplot(122)\n", + "plt.scatter(t, X[:, 1], c=t, cmap=plt.cm.hot)\n", + "plt.axis([4, 15, axes[2], axes[3]])\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.grid(True)\n", + "\n", + "save_fig(\"squished_swiss_roll_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from matplotlib import gridspec\n", + "\n", + "axes = [-11.5, 14, -2, 23, -12, 15]\n", + "\n", + "x2s = np.linspace(axes[2], axes[3], 10)\n", + "x3s = np.linspace(axes[4], axes[5], 10)\n", + "x2, x3 = np.meshgrid(x2s, x3s)\n", + "\n", + "fig = plt.figure(figsize=(6, 5))\n", + "ax = plt.subplot(111, projection='3d')\n", + "\n", + "positive_class = X[:, 0] > 5\n", + "X_pos = X[positive_class]\n", + "X_neg = X[~positive_class]\n", + "ax.view_init(10, -70)\n", + "ax.plot(X_neg[:, 0], X_neg[:, 1], X_neg[:, 2], \"y^\")\n", + "ax.plot_wireframe(5, x2, x3, alpha=0.5)\n", + "ax.plot(X_pos[:, 0], X_pos[:, 1], X_pos[:, 2], \"gs\")\n", + "ax.set_xlabel(\"$x_1$\", fontsize=18)\n", + "ax.set_ylabel(\"$x_2$\", fontsize=18)\n", + "ax.set_zlabel(\"$x_3$\", fontsize=18)\n", + "ax.set_xlim(axes[0:2])\n", + "ax.set_ylim(axes[2:4])\n", + "ax.set_zlim(axes[4:6])\n", + "\n", + "save_fig(\"manifold_decision_boundary_plot1\")\n", + "plt.show()\n", + "\n", + "fig = plt.figure(figsize=(5, 4))\n", + "ax = plt.subplot(111)\n", + "\n", + "plt.plot(t[positive_class], X[positive_class, 1], \"gs\")\n", + "plt.plot(t[~positive_class], X[~positive_class, 1], \"y^\")\n", + "plt.axis([4, 15, axes[2], axes[3]])\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + "plt.grid(True)\n", + "\n", + "save_fig(\"manifold_decision_boundary_plot2\")\n", + "plt.show()\n", + "\n", + "fig = plt.figure(figsize=(6, 5))\n", + "ax = plt.subplot(111, projection='3d')\n", + "\n", + "positive_class = 2 * (t[:] - 4) > X[:, 1]\n", + "X_pos = X[positive_class]\n", + "X_neg = X[~positive_class]\n", + "ax.view_init(10, -70)\n", + "ax.plot(X_neg[:, 0], X_neg[:, 1], X_neg[:, 2], \"y^\")\n", + "ax.plot(X_pos[:, 0], X_pos[:, 1], X_pos[:, 2], \"gs\")\n", + "ax.set_xlabel(\"$x_1$\", fontsize=18)\n", + "ax.set_ylabel(\"$x_2$\", fontsize=18)\n", + "ax.set_zlabel(\"$x_3$\", fontsize=18)\n", + "ax.set_xlim(axes[0:2])\n", + "ax.set_ylim(axes[2:4])\n", + "ax.set_zlim(axes[4:6])\n", + "\n", + "save_fig(\"manifold_decision_boundary_plot3\")\n", + "plt.show()\n", + "\n", + "fig = plt.figure(figsize=(5, 4))\n", + "ax = plt.subplot(111)\n", + "\n", + "plt.plot(t[positive_class], X[positive_class, 1], \"gs\")\n", + "plt.plot(t[~positive_class], X[~positive_class, 1], \"y^\")\n", + "plt.plot([4, 15], [0, 22], \"b-\", linewidth=2)\n", + "plt.axis([4, 15, axes[2], axes[3]])\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + "plt.grid(True)\n", + "\n", + "save_fig(\"manifold_decision_boundary_plot4\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "angle = np.pi / 5\n", + "stretch = 5\n", + "m = 200\n", + "\n", + "rnd.seed(3)\n", + "X = rnd.randn(m, 2) / 10\n", + "X = X.dot(np.array([[stretch, 0],[0, 1]])) # stretch\n", + "X = X.dot([[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]) # rotate\n", + "\n", + "u1 = np.array([np.cos(angle), np.sin(angle)])\n", + "u2 = np.array([np.cos(angle - 2 * np.pi/6), np.sin(angle - 2 * np.pi/6)])\n", + "u3 = np.array([np.cos(angle - np.pi/2), np.sin(angle - np.pi/2)])\n", + "\n", + "X_proj1 = X.dot(u1.reshape(-1, 1))\n", + "X_proj2 = X.dot(u2.reshape(-1, 1))\n", + "X_proj3 = X.dot(u3.reshape(-1, 1))\n", + "\n", + "plt.figure(figsize=(8,4))\n", + "plt.subplot2grid((3,2), (0, 0), rowspan=3)\n", + "plt.plot([-1.4, 1.4], [-1.4*u1[1]/u1[0], 1.4*u1[1]/u1[0]], \"k-\", linewidth=1)\n", + "plt.plot([-1.4, 1.4], [-1.4*u2[1]/u2[0], 1.4*u2[1]/u2[0]], \"k--\", linewidth=1)\n", + "plt.plot([-1.4, 1.4], [-1.4*u3[1]/u3[0], 1.4*u3[1]/u3[0]], \"k:\", linewidth=2)\n", + "plt.plot(X[:, 0], X[:, 1], \"bo\", alpha=0.5)\n", + "plt.axis([-1.4, 1.4, -1.4, 1.4])\n", + "plt.arrow(0, 0, u1[0], u1[1], head_width=0.1, linewidth=5, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n", + "plt.arrow(0, 0, u3[0], u3[1], head_width=0.1, linewidth=5, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n", + "plt.text(u1[0] + 0.1, u1[1] - 0.05, r\"$\\mathbf{c_1}$\", fontsize=22)\n", + "plt.text(u3[0] + 0.1, u3[1], r\"$\\mathbf{c_2}$\", fontsize=22)\n", + "plt.xlabel(\"$x_1$\", fontsize=18)\n", + "plt.ylabel(\"$x_2$\", fontsize=18, rotation=0)\n", + "plt.grid(True)\n", + "\n", + "plt.subplot2grid((3,2), (0, 1))\n", + "plt.plot([-2, 2], [0, 0], \"k-\", linewidth=1)\n", + "plt.plot(X_proj1[:, 0], np.zeros(m), \"bo\", alpha=0.3)\n", + "plt.gca().get_yaxis().set_ticks([])\n", + "plt.gca().get_xaxis().set_ticklabels([])\n", + "plt.axis([-2, 2, -1, 1])\n", + "plt.grid(True)\n", + "\n", + "plt.subplot2grid((3,2), (1, 1))\n", + "plt.plot([-2, 2], [0, 0], \"k--\", linewidth=1)\n", + "plt.plot(X_proj2[:, 0], np.zeros(m), \"bo\", alpha=0.3)\n", + "plt.gca().get_yaxis().set_ticks([])\n", + "plt.gca().get_xaxis().set_ticklabels([])\n", + "plt.axis([-2, 2, -1, 1])\n", + "plt.grid(True)\n", + "\n", + "plt.subplot2grid((3,2), (2, 1))\n", + "plt.plot([-2, 2], [0, 0], \"k:\", linewidth=2)\n", + "plt.plot(X_proj3[:, 0], np.zeros(m), \"bo\", alpha=0.3)\n", + "plt.gca().get_yaxis().set_ticks([])\n", + "plt.axis([-2, 2, -1, 1])\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.grid(True)\n", + "\n", + "save_fig(\"pca_best_projection\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MNIST compression" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.cross_validation import train_test_split\n", + "from sklearn.datasets import fetch_mldata\n", + "\n", + "mnist = fetch_mldata('MNIST original')\n", + "X = mnist[\"data\"]\n", + "y = mnist[\"target\"]\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = X_train\n", + "\n", + "pca = PCA()\n", + "pca.fit(X)\n", + "d = np.argmax(np.cumsum(pca.explained_variance_ratio_) >= 0.95) + 1\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pca = PCA(n_components=0.95)\n", + "X_reduced = pca.fit_transform(X)\n", + "pca.n_components_" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.sum(pca.explained_variance_ratio_)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_mnist = X_train\n", + "\n", + "pca = PCA(n_components = 154)\n", + "X_mnist_reduced = pca.fit_transform(X_mnist)\n", + "X_mnist_recovered = pca.inverse_transform(X_mnist_reduced)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def plot_digits(instances, images_per_row=5, **options):\n", + " size = 28\n", + " images_per_row = min(len(instances), images_per_row)\n", + " images = [instance.reshape(size,size) for instance in instances]\n", + " n_rows = (len(instances) - 1) // images_per_row + 1\n", + " row_images = []\n", + " n_empty = n_rows * images_per_row - len(instances)\n", + " images.append(np.zeros((size, size * n_empty)))\n", + " for row in range(n_rows):\n", + " rimages = images[row * images_per_row : (row + 1) * images_per_row]\n", + " row_images.append(np.concatenate(rimages, axis=1))\n", + " image = np.concatenate(row_images, axis=0)\n", + " plt.imshow(image, cmap = matplotlib.cm.binary, **options)\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(7, 4))\n", + "plt.subplot(121)\n", + "plot_digits(X_mnist[::2100])\n", + "plt.title(\"Original\", fontsize=16)\n", + "plt.subplot(122)\n", + "plot_digits(X_mnist_recovered[::2100])\n", + "plt.title(\"Compressed\", fontsize=16)\n", + "\n", + "save_fig(\"mnist_compression_plot\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.decomposition import IncrementalPCA\n", + "\n", + "n_batches = 100\n", + "inc_pca = IncrementalPCA(n_components=154)\n", + "for X_batch in np.array_split(X_mnist, n_batches):\n", + " print(\".\", end=\"\")\n", + " inc_pca.partial_fit(X_batch)\n", + "\n", + "X_mnist_reduced_inc = inc_pca.transform(X_mnist)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X_mnist_recovered_inc = inc_pca.inverse_transform(X_mnist_reduced_inc)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(7, 4))\n", + "plt.subplot(121)\n", + "plot_digits(X_mnist[::2100])\n", + "plt.subplot(122)\n", + "plot_digits(X_mnist_recovered_inc[::2100])\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(pca.mean_, inc_pca.mean_)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(X_mnist_reduced, X_mnist_reduced_inc)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "filename = \"my_mnist.data\"\n", + "\n", + "X_mm = np.memmap(filename, dtype='float32', mode='write', shape=X_mnist.shape)\n", + "X_mm[:] = X_mnist\n", + "del X_mm" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_mm = np.memmap(filename, dtype='float32', mode='readonly', shape=X_mnist.shape)\n", + "\n", + "batch_size = len(X_mnist) // n_batches\n", + "inc_pca = IncrementalPCA(n_components=154, batch_size=batch_size)\n", + "inc_pca.fit(X_mm)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.decomposition import RandomizedPCA\n", + "\n", + "rnd_pca = RandomizedPCA(n_components=154, random_state=42)\n", + "X_reduced = rnd_pca.fit_transform(X_mnist)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "for n_components in (2, 10, 154):\n", + " print(\"n_components =\", n_components)\n", + " regular_pca = PCA(n_components=n_components)\n", + " inc_pca = IncrementalPCA(n_components=154, batch_size=500)\n", + " rnd_pca = RandomizedPCA(n_components=154, random_state=42)\n", + "\n", + " for pca in (regular_pca, inc_pca, rnd_pca):\n", + " t1 = time.time()\n", + " pca.fit(X_mnist)\n", + " t2 = time.time()\n", + " print(pca.__class__.__name__, t2 - t1, \"seconds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Kernel PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.decomposition import KernelPCA\n", + "\n", + "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)\n", + "\n", + "lin_pca = KernelPCA(n_components = 2, kernel=\"linear\", fit_inverse_transform=True)\n", + "rbf_pca = KernelPCA(n_components = 2, kernel=\"rbf\", gamma=0.0433, fit_inverse_transform=True)\n", + "sig_pca = KernelPCA(n_components = 2, kernel=\"sigmoid\", gamma=0.001, coef0=1, fit_inverse_transform=True)\n", + "\n", + "y = t > 6.9\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "for subplot, pca, title in ((131, lin_pca, \"Linear kernel\"), (132, rbf_pca, \"RBF kernel, $\\gamma=0.04$\"), (133, sig_pca, \"Sigmoid kernel, $\\gamma=10^{-3}, r=1$\")):\n", + " X_reduced = pca.fit_transform(X)\n", + " if subplot == 132:\n", + " X_reduced_rbf = X_reduced\n", + " \n", + " plt.subplot(subplot)\n", + " #plt.plot(X_reduced[y, 0], X_reduced[y, 1], \"gs\")\n", + " #plt.plot(X_reduced[~y, 0], X_reduced[~y, 1], \"y^\")\n", + " plt.title(title, fontsize=14)\n", + " plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)\n", + " plt.xlabel(\"$z_1$\", fontsize=18)\n", + " if subplot == 131:\n", + " plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + " plt.grid(True)\n", + "\n", + "save_fig(\"kernel_pca_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(6, 5))\n", + "\n", + "X_inverse = pca.inverse_transform(X_reduced_rbf)\n", + "\n", + "ax = plt.subplot(111, projection='3d')\n", + "ax.view_init(10, -70)\n", + "ax.scatter(X_inverse[:, 0], X_inverse[:, 1], X_inverse[:, 2], c=t, cmap=plt.cm.hot, marker=\"x\")\n", + "ax.set_xlabel(\"\")\n", + "ax.set_ylabel(\"\")\n", + "ax.set_zlabel(\"\")\n", + "ax.set_xticklabels([])\n", + "ax.set_yticklabels([])\n", + "ax.set_zticklabels([])\n", + "\n", + "save_fig(\"preimage_plot\", tight_layout=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_reduced = rbf_pca.fit_transform(X)\n", + "\n", + "plt.figure(figsize=(11, 4))\n", + "plt.subplot(132)\n", + "plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot, marker=\"x\")\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.pipeline import Pipeline\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.grid_search import GridSearchCV\n", + "\n", + "clf = Pipeline([\n", + " (\"kpca\", KernelPCA(n_components=2)),\n", + " (\"log_reg\", LogisticRegression())\n", + " ])\n", + "\n", + "param_grid = [\n", + " {\"kpca__gamma\": np.linspace(0.03, 0.05, 10), \"kpca__kernel\": [\"rbf\", \"sigmoid\"]}\n", + " ]\n", + "\n", + "grid_search = GridSearchCV(clf, param_grid, cv=3)\n", + "grid_search.fit(X, y)\n", + "grid_search.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "rbf_pca = KernelPCA(n_components = 2, kernel=\"rbf\", gamma=0.0433,\n", + " fit_inverse_transform=True)\n", + "X_reduced = rbf_pca.fit_transform(X)\n", + "X_preimage = rbf_pca.inverse_transform(X_reduced)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "mean_squared_error(X, X_preimage)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "times_rpca = []\n", + "times_pca = []\n", + "sizes = [1000, 10000, 20000, 30000, 40000, 50000, 70000, 100000, 200000, 500000]\n", + "for n_samples in sizes:\n", + " X = rnd.randn(n_samples, 5)\n", + " pca = RandomizedPCA(n_components = 2, random_state=42)\n", + " t1 = time.time()\n", + " pca.fit(X)\n", + " t2 = time.time()\n", + " times_rpca.append(t2 - t1)\n", + " pca = PCA(n_components = 2)\n", + " t1 = time.time()\n", + " pca.fit(X)\n", + " t2 = time.time()\n", + " times_pca.append(t2 - t1)\n", + "\n", + "plt.plot(sizes, times_rpca, \"b-o\", label=\"RPCA\")\n", + "plt.plot(sizes, times_pca, \"r-s\", label=\"PCA\")\n", + "plt.xlabel(\"n_samples\")\n", + "plt.ylabel(\"Training time\")\n", + "plt.legend(loc=\"upper left\")\n", + "plt.title(\"PCA and Randomized PCA time complexity \")" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "times_rpca = []\n", + "times_pca = []\n", + "sizes = [1000, 2000, 3000, 4000, 5000, 6000]\n", + "for n_features in sizes:\n", + " X = rnd.randn(2000, n_features)\n", + " pca = RandomizedPCA(n_components = 2, random_state=42)\n", + " t1 = time.time()\n", + " pca.fit(X)\n", + " t2 = time.time()\n", + " times_rpca.append(t2 - t1)\n", + " pca = PCA(n_components = 2)\n", + " t1 = time.time()\n", + " pca.fit(X)\n", + " t2 = time.time()\n", + " times_pca.append(t2 - t1)\n", + "\n", + "plt.plot(sizes, times_rpca, \"b-o\", label=\"RPCA\")\n", + "plt.plot(sizes, times_pca, \"r-s\", label=\"PCA\")\n", + "plt.xlabel(\"n_features\")\n", + "plt.ylabel(\"Training time\")\n", + "plt.legend(loc=\"upper left\")\n", + "plt.title(\"PCA and Randomized PCA time complexity \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LLE" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.manifold import LocallyLinearEmbedding\n", + "\n", + "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=41)\n", + "\n", + "lle = LocallyLinearEmbedding(n_neighbors=10, n_components=2, random_state=42)\n", + "X_reduced = lle.fit_transform(X)\n", + "\n", + "plt.title(\"Unrolled swiss roll using LLE\", fontsize=14)\n", + "plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.ylabel(\"$z_2$\", fontsize=18)\n", + "plt.axis([-0.065, 0.055, -0.1, 0.12])\n", + "plt.grid(True)\n", + "\n", + "save_fig(\"lle_unrolling_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MDS, Isomap and t-SNE" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.manifold import MDS\n", + "mds = MDS(n_components=2, random_state=42)\n", + "X_reduced_mds = mds.fit_transform(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.manifold import Isomap\n", + "isomap = Isomap(n_components=2)\n", + "X_reduced_isomap = isomap.fit_transform(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.manifold import TSNE\n", + "tsne = TSNE(n_components=2)\n", + "X_reduced_tsne = tsne.fit_transform(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "lda = LinearDiscriminantAnalysis(n_components=2)\n", + "X_mnist = mnist[\"data\"]\n", + "y_mnist = mnist[\"target\"]\n", + "lda.fit(X_mnist, y_mnist)\n", + "X_reduced_lda = lda.transform(X_mnist)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "titles = [\"MDS\", \"Isomap\", \"t-SNE\"]\n", + "\n", + "plt.figure(figsize=(11,4))\n", + "\n", + "for subplot, title, X_reduced in zip((131, 132, 133), titles,\n", + " (X_reduced_mds, X_reduced_isomap, X_reduced_tsne)):\n", + " plt.subplot(subplot)\n", + " plt.title(title, fontsize=14)\n", + " plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)\n", + " plt.xlabel(\"$z_1$\", fontsize=18)\n", + " if subplot == 131:\n", + " plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + " plt.grid(True)\n", + "\n", + "save_fig(\"other_dim_reduction_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": { + "height": "352px", + "width": "458px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/09_up_and_running_with_tensorflow.ipynb b/09_up_and_running_with_tensorflow.ipynb new file mode 100644 index 0000000..138ad7d --- /dev/null +++ b/09_up_and_running_with_tensorflow.ipynb @@ -0,0 +1,1709 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 9 – Up and running with TensorFlow**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 9._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"tensorflow\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating and running a graph" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "x = tf.Variable(3, name=\"x\")\n", + "y = tf.Variable(4, name=\"y\")\n", + "f = x*x*y + y + 2\n", + "\n", + "f" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "sess = tf.Session()\n", + "sess.run(x.initializer)\n", + "sess.run(y.initializer)\n", + "print(sess.run(f))\n", + "sess.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " x.initializer.run()\n", + " y.initializer.run()\n", + " result = f.eval()\n", + "\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "init = tf.initialize_all_variables()\n", + "\n", + "with tf.Session():\n", + " init.run()\n", + " result = f.eval()\n", + "\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "init = tf.initialize_all_variables()\n", + "\n", + "sess = tf.InteractiveSession()\n", + "init.run()\n", + "result = f.eval()\n", + "sess.close()\n", + "\n", + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Managing graphs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "x1 = tf.Variable(1)\n", + "x1.graph is tf.get_default_graph()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "graph = tf.Graph()\n", + "with graph.as_default():\n", + " x2 = tf.Variable(2)\n", + "\n", + "x2.graph is tf.get_default_graph()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "x2.graph is graph" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "w = tf.constant(3)\n", + "x = w + 2\n", + "y = x + 5\n", + "z = x * 3\n", + "\n", + "with tf.Session() as sess:\n", + " print(y.eval()) # 10\n", + " print(z.eval()) # 15" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " y_val, z_val = sess.run([y, z])\n", + " print(y) # 10\n", + " print(z) # 15" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Linear Regression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using the Normal Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_california_housing\n", + "\n", + "housing = fetch_california_housing()\n", + "m, n = housing.data.shape\n", + "housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "X = tf.constant(housing_data_plus_bias, dtype=tf.float64, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float64, name=\"y\")\n", + "XT = tf.transpose(X)\n", + "theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)\n", + "\n", + "with tf.Session() as sess:\n", + " result = theta.eval()\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compare with pure NumPy" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = housing_data_plus_bias\n", + "y = housing.target.reshape(-1, 1)\n", + "theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)\n", + "\n", + "print(theta_numpy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compare with Scikit-Learn" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "lin_reg = LinearRegression()\n", + "lin_reg.fit(housing.data, housing.target.reshape(-1, 1))\n", + "\n", + "print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using Batch Gradient Descent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Gradient Descent requires scaling the feature vectors first. We could do this using TF, but let's just use Scikit-Learn for now." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "scaled_housing_data = scaler.fit_transform(housing.data)\n", + "scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(scaled_housing_data_plus_bias.mean(axis=0))\n", + "print(scaled_housing_data_plus_bias.mean(axis=1))\n", + "print(scaled_housing_data_plus_bias.mean())\n", + "print(scaled_housing_data_plus_bias.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Manually computing the gradients" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "gradients = 2/m * tf.matmul(tf.transpose(X), error)\n", + "training_op = tf.assign(theta, theta - learning_rate * gradients)\n", + "\n", + "init = tf.initialize_all_variables()\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " if epoch % 100 == 0:\n", + " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", + " sess.run(training_op)\n", + " \n", + " best_theta = theta.eval()\n", + "\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using autodiff\n", + "Same as above except for the `gradients = ...` line." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "gradients = tf.gradients(mse, [theta])[0]\n", + "training_op = tf.assign(theta, theta - learning_rate * gradients)\n", + "\n", + "init = tf.initialize_all_variables()\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " if epoch % 100 == 0:\n", + " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", + " sess.run(training_op)\n", + " \n", + " best_theta = theta.eval()\n", + "\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using a `GradientDescentOptimizer`" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(mse)\n", + "\n", + "init = tf.initialize_all_variables()\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " if epoch % 100 == 0:\n", + " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", + " sess.run(training_op)\n", + " \n", + " best_theta = theta.eval()\n", + "\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using a momentum optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.25)\n", + "training_op = optimizer.minimize(mse)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " sess.run(training_op)\n", + " \n", + " best_theta = theta.eval()\n", + "\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feeding data to the training algorithm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Placeholder nodes" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + ">>> tf.reset_default_graph()\n", + "\n", + ">>> A = tf.placeholder(tf.float32, shape=(None, 3))\n", + ">>> B = A + 5\n", + ">>> with tf.Session() as sess:\n", + "... B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})\n", + "... B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})\n", + "...\n", + ">>> print(B_val_1)\n", + ">>> print(B_val_2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mini-batch Gradient Descent" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", + "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(mse)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_batch(epoch, batch_index, batch_size):\n", + " rnd.seed(epoch * n_batches + batch_index)\n", + " indices = rnd.randint(m, size=batch_size)\n", + " X_batch = scaled_housing_data_plus_bias[indices]\n", + " y_batch = housing.target.reshape(-1, 1)[indices]\n", + " return X_batch, y_batch\n", + "\n", + "n_epochs = 10\n", + "batch_size = 100\n", + "n_batches = int(np.ceil(m / batch_size))\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " for batch_index in range(n_batches):\n", + " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + "\n", + " best_theta = theta.eval()\n", + " \n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Saving and restoring a model" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(mse)\n", + "\n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " if epoch % 100 == 0:\n", + " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", + " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", + " sess.run(training_op)\n", + " \n", + " best_theta = theta.eval()\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")\n", + "\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualizing the graph\n", + "## inside Jupyter" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output, Image, display, HTML\n", + "\n", + "def strip_consts(graph_def, max_const_size=32):\n", + " \"\"\"Strip large constant values from graph_def.\"\"\"\n", + " strip_def = tf.GraphDef()\n", + " for n0 in graph_def.node:\n", + " n = strip_def.node.add() \n", + " n.MergeFrom(n0)\n", + " if n.op == 'Const':\n", + " tensor = n.attr['value'].tensor\n", + " size = len(tensor.tensor_content)\n", + " if size > max_const_size:\n", + " tensor.tensor_content = b\"\"%size\n", + " return strip_def\n", + "\n", + "def show_graph(graph_def, max_const_size=32):\n", + " \"\"\"Visualize TensorFlow graph.\"\"\"\n", + " if hasattr(graph_def, 'as_graph_def'):\n", + " graph_def = graph_def.as_graph_def()\n", + " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", + " code = \"\"\"\n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", + "\n", + " iframe = \"\"\"\n", + " \n", + " \"\"\".format(code.replace('\"', '"'))\n", + " display(HTML(iframe))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "show_graph(tf.get_default_graph())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using TensorBoard" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from datetime import datetime\n", + "\n", + "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n", + "root_logdir = \"tf_logs\"\n", + "logdir = \"{}/run-{}/\".format(root_logdir, now)\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", + "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "error = y_pred - y\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(mse)\n", + "\n", + "init = tf.initialize_all_variables()\n", + "\n", + "mse_summary = tf.scalar_summary('MSE', mse)\n", + "summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 10\n", + "batch_size = 100\n", + "n_batches = int(np.ceil(m / batch_size))\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " for batch_index in range(n_batches):\n", + " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", + " if batch_index % 10 == 0:\n", + " summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n", + " step = epoch * n_batches + batch_index\n", + " summary_writer.add_summary(summary_str, step)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + "\n", + " best_theta = theta.eval()\n", + "\n", + "summary_writer.flush()\n", + "summary_writer.close()\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Name scopes" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n", + "root_logdir = \"tf_logs\"\n", + "logdir = \"{}/run-{}/\".format(root_logdir, now)\n", + "\n", + "n_epochs = 1000\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", + "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", + "with tf.name_scope('loss') as scope:\n", + " error = y_pred - y\n", + " mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(mse)\n", + "\n", + "init = tf.initialize_all_variables()\n", + "\n", + "mse_summary = tf.scalar_summary('MSE', mse)\n", + "summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 10\n", + "batch_size = 100\n", + "n_batches = int(np.ceil(m / batch_size))\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " for batch_index in range(n_batches):\n", + " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", + " if batch_index % 10 == 0:\n", + " summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n", + " step = epoch * n_batches + batch_index\n", + " summary_writer.add_summary(summary_str, step)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + "\n", + " best_theta = theta.eval()\n", + "\n", + "summary_writer.flush()\n", + "summary_writer.close()\n", + "print(\"Best theta:\")\n", + "print(best_theta)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(error.op.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(mse.op.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "a1 = tf.Variable(0, name=\"a\") # name == \"a\"\n", + "a2 = tf.Variable(0, name=\"a\") # name == \"a_1\"\n", + "\n", + "with tf.name_scope(\"param\"): # name == \"param\"\n", + " a3 = tf.Variable(0, name=\"a\") # name == \"param/a\"\n", + "\n", + "with tf.name_scope(\"param\"): # name == \"param_1\"\n", + " a4 = tf.Variable(0, name=\"a\") # name == \"param_1/a\"\n", + "\n", + "for node in (a1, a2, a3, a4):\n", + " print(node.op.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modularity" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An ugly flat code:" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_features = 3\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "\n", + "w1 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights1\")\n", + "w2 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights2\")\n", + "b1 = tf.Variable(0.0, name=\"bias1\")\n", + "b2 = tf.Variable(0.0, name=\"bias2\")\n", + "\n", + "linear1 = tf.add(tf.matmul(X, w1), b1, name=\"linear1\")\n", + "linear2 = tf.add(tf.matmul(X, w2), b2, name=\"linear2\")\n", + "\n", + "relu1 = tf.maximum(linear1, 0, name=\"relu1\")\n", + "relu2 = tf.maximum(linear1, 0, name=\"relu2\") # Oops, cut&paste error! Did you spot it?\n", + "\n", + "output = tf.add_n([relu1, relu2], name=\"output\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Much better, using a function to build the ReLUs:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def relu(X):\n", + " w_shape = int(X.get_shape()[1]), 1\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", + " b = tf.Variable(0.0, name=\"bias\")\n", + " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", + " return tf.maximum(linear, 0, name=\"relu\")\n", + "\n", + "n_features = 3\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "relus = [relu(X) for i in range(5)]\n", + "output = tf.add_n(relus, name=\"output\")\n", + "summary_writer = tf.train.SummaryWriter(\"logs/relu1\", tf.get_default_graph())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even better using name scopes:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def relu(X):\n", + " with tf.name_scope(\"relu\"):\n", + " w_shape = int(X.get_shape()[1]), 1\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", + " b = tf.Variable(0.0, name=\"bias\")\n", + " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", + " return tf.maximum(linear, 0, name=\"max\")\n", + "\n", + "n_features = 3\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "relus = [relu(X) for i in range(5)]\n", + "output = tf.add_n(relus, name=\"output\")\n", + "\n", + "summary_writer = tf.train.SummaryWriter(\"logs/relu2\", tf.get_default_graph())" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "summary_writer.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sharing a `threshold` variable the classic way, by defining it outside of the `relu()` function then passing it as a parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def relu(X, threshold):\n", + " with tf.name_scope(\"relu\"):\n", + " w_shape = int(X.get_shape()[1]), 1\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", + " b = tf.Variable(0.0, name=\"bias\")\n", + " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", + " return tf.maximum(linear, threshold, name=\"max\")\n", + "\n", + "threshold = tf.Variable(0.0, name=\"threshold\")\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "relus = [relu(X, threshold) for i in range(5)]\n", + "output = tf.add_n(relus, name=\"output\")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def relu(X):\n", + " with tf.name_scope(\"relu\"):\n", + " if not hasattr(relu, \"threshold\"):\n", + " relu.threshold = tf.Variable(0.0, name=\"threshold\")\n", + " w_shape = int(X.get_shape()[1]), 1\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", + " b = tf.Variable(0.0, name=\"bias\")\n", + " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", + " return tf.maximum(linear, relu.threshold, name=\"max\")\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "relus = [relu(X) for i in range(5)]\n", + "output = tf.add_n(relus, name=\"output\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def relu(X):\n", + " with tf.variable_scope(\"relu\", reuse=True):\n", + " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", + " w_shape = int(X.get_shape()[1]), 1\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", + " b = tf.Variable(0.0, name=\"bias\")\n", + " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", + " return tf.maximum(linear, threshold, name=\"max\")\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "with tf.variable_scope(\"relu\"):\n", + " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", + "relus = [relu(X) for i in range(5)]\n", + "output = tf.add_n(relus, name=\"output\")\n", + "\n", + "summary_writer = tf.train.SummaryWriter(\"logs/relu6\", tf.get_default_graph())\n", + "summary_writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def relu(X):\n", + " with tf.variable_scope(\"relu\"):\n", + " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", + " w_shape = int(X.get_shape()[1]), 1\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", + " b = tf.Variable(0.0, name=\"bias\")\n", + " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", + " return tf.maximum(linear, threshold, name=\"max\")\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "with tf.variable_scope(\"\") as scope:\n", + " first_relu = relu(X) # create the shared variable\n", + " scope.reuse_variables() # then reuse it\n", + " relus = [first_relu] + [relu(X) for i in range(4)]\n", + "output = tf.add_n(relus, name=\"output\")\n", + "\n", + "summary_writer = tf.train.SummaryWriter(\"logs/relu8\", tf.get_default_graph())\n", + "summary_writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "with tf.variable_scope(\"param\"):\n", + " x = tf.get_variable(\"x\", shape=(), initializer=tf.constant_initializer(0.))\n", + " #x = tf.Variable(0., name=\"x\")\n", + "with tf.variable_scope(\"param\", reuse=True):\n", + " y = tf.get_variable(\"x\")\n", + "\n", + "with tf.variable_scope(\"\", reuse=True):\n", + " z = tf.get_variable(\"param/x\", shape=(), initializer=tf.constant_initializer(0.))\n", + "\n", + "print(x is y)\n", + "print(x.op.name)\n", + "print(y.op.name)\n", + "print(z.op.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extra material" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Strings" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "text = np.array(\"Do you want some café?\".split())\n", + "text_tensor = tf.constant(text)\n", + "\n", + "with tf.Session() as sess:\n", + " print(text_tensor.eval())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Distributed TensorFlow" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "server = tf.train.Server.create_local_server()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x = tf.constant(2) + tf.constant(3)\n", + "with tf.Session(server.target) as sess:\n", + " print(sess.run(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "server.target" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class Const(object):\n", + " def __init__(self, value):\n", + " self.value = value\n", + " def evaluate(self, **variables):\n", + " return self.value\n", + " def __str__(self):\n", + " return str(self.value)\n", + "\n", + "class Var(object):\n", + " def __init__(self, name):\n", + " self.name = name\n", + " def evaluate(self, **variables):\n", + " return variables[self.name]\n", + " def __str__(self):\n", + " return self.name\n", + "\n", + "class BinaryOperator(object):\n", + " def __init__(self, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + "class Add(BinaryOperator):\n", + " def evaluate(self, **variables):\n", + " return self.a.evaluate(**variables) + self.b.evaluate(**variables)\n", + " def __str__(self):\n", + " return \"{} + {}\".format(self.a, self.b)\n", + "\n", + "class Mul(BinaryOperator):\n", + " def evaluate(self, **variables):\n", + " return self.a.evaluate(**variables) * self.b.evaluate(**variables)\n", + " def __str__(self):\n", + " return \"({}) * ({})\".format(self.a, self.b)\n", + "\n", + "x = Var(\"x\")\n", + "y = Var(\"y\")\n", + "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", + "print(\"f(x,y) =\", f)\n", + "print(\"f(3,4) =\", f.evaluate(x=3, y=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Computing gradients\n", + "### Mathematical differentiation" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_dx = Mul(Const(2), Mul(Var(\"x\"), Var(\"y\"))) # df/dx = 2xy\n", + "df_dy = Add(Mul(Var(\"x\"), Var(\"x\")), Const(1)) # df/dy = x² + 1\n", + "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n", + "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Numerical differentiation" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def derivative(f, x, y, x_eps, y_eps):\n", + " return (f.evaluate(x = x + x_eps, y = y + y_eps) - f.evaluate(x = x, y = y)) / (x_eps + y_eps)\n", + "\n", + "df_dx_34 = derivative(f, x=3, y=4, x_eps=0.0001, y_eps=0)\n", + "df_dy_34 = derivative(f, x=3, y=4, x_eps=0, y_eps=0.0001)\n", + "print(\"df/dx(3,4) =\", df_dx_34)\n", + "print(\"df/dy(3,4) =\", df_dy_34)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def f(x, y):\n", + " return x**2*y + y + 2\n", + "\n", + "def derivative(f, x, y, x_eps, y_eps):\n", + " return (f(x + x_eps, y + y_eps) - f(x, y)) / (x_eps + y_eps)\n", + "\n", + "df_dx = derivative(f, 3, 4, 0.00001, 0)\n", + "df_dy = derivative(f, 3, 4, 0, 0.00001)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(df_dx)\n", + "print(df_dy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Symbolic differentiation" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Const.derive = lambda self, var: Const(0)\n", + "Var.derive = lambda self, var: Const(1) if self.name==var else Const(0)\n", + "Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))\n", + "Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))\n", + "\n", + "x = Var(\"x\")\n", + "y = Var(\"y\")\n", + "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", + "\n", + "df_dx = f.derive(\"x\") # 2xy\n", + "df_dy = f.derive(\"y\") # x² + 1\n", + "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n", + "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Automatic differentiation (autodiff) – forward mode" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class Const(object):\n", + " def __init__(self, value):\n", + " self.value = value\n", + " def evaluate(self, derive, **variables):\n", + " return self.value, 0\n", + " def __str__(self):\n", + " return str(self.value)\n", + "\n", + "class Var(object):\n", + " def __init__(self, name):\n", + " self.name = name\n", + " def evaluate(self, derive, **variables):\n", + " return variables[self.name], (1 if derive == self.name else 0)\n", + " def __str__(self):\n", + " return self.name\n", + "\n", + "class BinaryOperator(object):\n", + " def __init__(self, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + "class Add(BinaryOperator):\n", + " def evaluate(self, derive, **variables):\n", + " a, da = self.a.evaluate(derive, **variables)\n", + " b, db = self.b.evaluate(derive, **variables)\n", + " return a + b, da + db\n", + " def __str__(self):\n", + " return \"{} + {}\".format(self.a, self.b)\n", + "\n", + "class Mul(BinaryOperator):\n", + " def evaluate(self, derive, **variables):\n", + " a, da = self.a.evaluate(derive, **variables)\n", + " b, db = self.b.evaluate(derive, **variables)\n", + " return a * b, a * db + da * b\n", + " def __str__(self):\n", + " return \"({}) * ({})\".format(self.a, self.b)\n", + "\n", + "x = Var(\"x\")\n", + "y = Var(\"y\")\n", + "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", + "f34, df_dx_34 = f.evaluate(x=3, y=4, derive=\"x\")\n", + "f34, df_dy_34 = f.evaluate(x=3, y=4, derive=\"y\")\n", + "print(\"f(3,4) =\", f34)\n", + "print(\"df/dx(3,4) =\", df_dx_34)\n", + "print(\"df/dy(3,4) =\", df_dy_34)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Autodiff – Reverse mode" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class Const(object):\n", + " def __init__(self, value):\n", + " self.derivative = 0\n", + " self.value = value\n", + " def evaluate(self, **variables):\n", + " return self.value\n", + " def backpropagate(self, derivative):\n", + " pass\n", + " def __str__(self):\n", + " return str(self.value)\n", + "\n", + "class Var(object):\n", + " def __init__(self, name):\n", + " self.name = name\n", + " def evaluate(self, **variables):\n", + " self.derivative = 0\n", + " self.value = variables[self.name]\n", + " return self.value\n", + " def backpropagate(self, derivative):\n", + " self.derivative += derivative\n", + " def __str__(self):\n", + " return self.name\n", + "\n", + "class BinaryOperator(object):\n", + " def __init__(self, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + "class Add(BinaryOperator):\n", + " def evaluate(self, **variables):\n", + " self.derivative = 0\n", + " self.value = self.a.evaluate(**variables) + self.b.evaluate(**variables)\n", + " return self.value\n", + " def backpropagate(self, derivative):\n", + " self.derivative += derivative\n", + " self.a.backpropagate(derivative)\n", + " self.b.backpropagate(derivative)\n", + " def __str__(self):\n", + " return \"{} + {}\".format(self.a, self.b)\n", + "\n", + "class Mul(BinaryOperator):\n", + " def evaluate(self, **variables):\n", + " self.derivative = 0\n", + " self.value = self.a.evaluate(**variables) * self.b.evaluate(**variables)\n", + " return self.value\n", + " def backpropagate(self, derivative):\n", + " self.derivative += derivative\n", + " self.a.backpropagate(derivative * self.b.value)\n", + " self.b.backpropagate(derivative * self.a.value)\n", + " def __str__(self):\n", + " return \"({}) * ({})\".format(self.a, self.b)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x = Var(\"x\")\n", + "y = Var(\"y\")\n", + "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", + "f34 = f.evaluate(x=3, y=4)\n", + "f.backpropagate(1)\n", + "print(\"f(3,4) =\", f34)\n", + "print(\"df/dx(3,4) =\", x.derivative)\n", + "print(\"df/dy(3,4) =\", y.derivative)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Autodiff – reverse mode (using TensorFlow)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "x = tf.Variable(3., name=\"x\")\n", + "y = tf.Variable(4., name=\"x\")\n", + "f = x*x*y + y + 2\n", + "\n", + "gradients = tf.gradients(f, [x, y])\n", + "\n", + "init = tf.initialize_all_variables()\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " f_val, gradients_val = sess.run([f, gradients])\n", + "\n", + "f_val, gradients_val" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": { + "height": "603px", + "width": "616px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/10_introduction_to_artificial_neural_networks.ipynb b/10_introduction_to_artificial_neural_networks.ipynb new file mode 100644 index 0000000..2a512a5 --- /dev/null +++ b/10_introduction_to_artificial_neural_networks.ipynb @@ -0,0 +1,660 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 10 – Introduction to Artificial Neural Networks**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 10._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"ann\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perceptrons" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris\n", + "iris = load_iris()\n", + "X = iris.data[:, (2, 3)] # petal length, petal width\n", + "y = (iris.target == 0).astype(np.int)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import Perceptron\n", + "\n", + "per_clf = Perceptron(random_state=42)\n", + "per_clf.fit(X, y)\n", + "\n", + "y_pred = per_clf.predict([[2, 0.5]])\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n", + "b = -per_clf.intercept_ / per_clf.coef_[0][1]\n", + "\n", + "axes = [0, 5, 0, 2]\n", + "\n", + "x0, x1 = np.meshgrid(\n", + " np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n", + " np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n", + " )\n", + "X_new = np.c_[x0.ravel(), x1.ravel()]\n", + "y_predict = per_clf.predict(X_new)\n", + "zz = y_predict.reshape(x0.shape)\n", + "\n", + "plt.figure(figsize=(10, 4))\n", + "plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n", + "plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n", + "\n", + "plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n", + "from matplotlib.colors import ListedColormap\n", + "custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n", + "\n", + "plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.legend(loc=\"lower right\", fontsize=14)\n", + "plt.axis(axes)\n", + "\n", + "save_fig(\"perceptron_iris_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Activation functions" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def logit(z):\n", + " return 1 / (1 + np.exp(-z))\n", + "\n", + "def relu(z):\n", + " return np.maximum(0, z)\n", + "\n", + "def derivative(f, z, eps=0.000001):\n", + " return (f(z + eps) - f(z - eps))/(2 * eps)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "z = np.linspace(-5, 5, 200)\n", + "\n", + "plt.figure(figsize=(11,4))\n", + "\n", + "plt.subplot(121)\n", + "plt.plot(z, np.sign(z), \"r-\", linewidth=2, label=\"Step\")\n", + "plt.plot(z, logit(z), \"g--\", linewidth=2, label=\"Logit\")\n", + "plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n", + "plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n", + "plt.grid(True)\n", + "plt.legend(loc=\"center right\", fontsize=14)\n", + "plt.title(\"Activation functions\", fontsize=14)\n", + "plt.axis([-5, 5, -1.2, 1.2])\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=2, label=\"Step\")\n", + "plt.plot(0, 0, \"ro\", markersize=5)\n", + "plt.plot(0, 0, \"rx\", markersize=10)\n", + "plt.plot(z, derivative(logit, z), \"g--\", linewidth=2, label=\"Logit\")\n", + "plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n", + "plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n", + "plt.grid(True)\n", + "#plt.legend(loc=\"center right\", fontsize=14)\n", + "plt.title(\"Derivatives\", fontsize=14)\n", + "plt.axis([-5, 5, -0.2, 1.2])\n", + "\n", + "save_fig(\"activation_functions_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def heaviside(z):\n", + " return (z >= 0).astype(z.dtype)\n", + "\n", + "def sigmoid(z):\n", + " return 1/(1+np.exp(-z))\n", + "\n", + "def mlp_xor(x1, x2, activation=heaviside):\n", + " return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x1s = np.linspace(-0.2, 1.2, 100)\n", + "x2s = np.linspace(-0.2, 1.2, 100)\n", + "x1, x2 = np.meshgrid(x1s, x2s)\n", + "\n", + "z1 = mlp_xor(x1, x2, activation=heaviside)\n", + "z2 = mlp_xor(x1, x2, activation=sigmoid)\n", + "\n", + "plt.figure(figsize=(10,4))\n", + "\n", + "plt.subplot(121)\n", + "plt.contourf(x1, x2, z1)\n", + "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n", + "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n", + "plt.title(\"Activation function: heaviside\", fontsize=14)\n", + "plt.grid(True)\n", + "\n", + "plt.subplot(122)\n", + "plt.contourf(x1, x2, z2)\n", + "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n", + "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n", + "plt.title(\"Activation function: sigmoid\", fontsize=14)\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# FNN for MNIST" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## using tf.learn" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from tensorflow.examples.tutorials.mnist import input_data\n", + "mnist = input_data.read_data_sets(\"/tmp/data/\")\n", + "X_train = mnist.train.images\n", + "X_test = mnist.test.images\n", + "y_train = mnist.train.labels.astype(\"int\")\n", + "y_test = mnist.test.labels.astype(\"int\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n", + "dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=10,\n", + " feature_columns=feature_columns)\n", + "dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40000)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "\n", + "y_pred = dnn_clf.predict(X_test)\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import log_loss\n", + "\n", + "y_pred_proba = dnn_clf.predict_proba(X_test)\n", + "log_loss(y_test, y_pred_proba)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dnn_clf.evaluate(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Using plain TensorFlow" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "def neuron_layer(X, n_neurons, name, activation=None):\n", + " with tf.name_scope(name):\n", + " n_inputs = int(X.get_shape()[1])\n", + " stddev = 1 / np.sqrt(n_inputs)\n", + " init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n", + " W = tf.Variable(init, name=\"weights\")\n", + " b = tf.Variable(tf.zeros([n_neurons]), name=\"biases\")\n", + " Z = tf.matmul(X, W) + b\n", + " if activation==\"relu\":\n", + " return tf.nn.relu(Z)\n", + " else:\n", + " return Z" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 28*28 # MNIST\n", + "n_hidden1 = 300\n", + "n_hidden2 = 100\n", + "n_outputs = 10\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " hidden1 = neuron_layer(X, n_hidden1, \"hidden1\", activation=\"relu\")\n", + " hidden2 = neuron_layer(hidden1, n_hidden2, \"hidden2\", activation=\"relu\")\n", + " logits = neuron_layer(hidden2, n_outputs, \"output\")\n", + "\n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " saver.restore(sess, \"my_model_final.ckpt\")\n", + " X_new_scaled = mnist.test.images[:20]\n", + " Z = logits.eval(feed_dict={X: X_new_scaled})\n", + " print(np.argmax(Z, axis=1))\n", + " print(mnist.test.labels[:20])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output, Image, display, HTML\n", + "\n", + "def strip_consts(graph_def, max_const_size=32):\n", + " \"\"\"Strip large constant values from graph_def.\"\"\"\n", + " strip_def = tf.GraphDef()\n", + " for n0 in graph_def.node:\n", + " n = strip_def.node.add() \n", + " n.MergeFrom(n0)\n", + " if n.op == 'Const':\n", + " tensor = n.attr['value'].tensor\n", + " size = len(tensor.tensor_content)\n", + " if size > max_const_size:\n", + " tensor.tensor_content = b\"\"%size\n", + " return strip_def\n", + "\n", + "def show_graph(graph_def, max_const_size=32):\n", + " \"\"\"Visualize TensorFlow graph.\"\"\"\n", + " if hasattr(graph_def, 'as_graph_def'):\n", + " graph_def = graph_def.as_graph_def()\n", + " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", + " code = \"\"\"\n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", + "\n", + " iframe = \"\"\"\n", + " \n", + " \"\"\".format(code.replace('\"', '"'))\n", + " display(HTML(iframe))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "show_graph(tf.get_default_graph())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using `fully_connected` instead of `neuron_layer()`" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_inputs = 28*28 # MNIST\n", + "n_hidden1 = 300\n", + "n_hidden2 = 100\n", + "n_outputs = 10\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", + " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", + " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "n_batches = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "show_graph(tf.get_default_graph())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": { + "height": "264px", + "width": "369px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/11_deep_learning.ipynb b/11_deep_learning.ipynb new file mode 100644 index 0000000..761a60f --- /dev/null +++ b/11_deep_learning.ipynb @@ -0,0 +1,931 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 11 – Deep Learning**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 11._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"deep\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Activation functions" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def logit(z):\n", + " return 1 / (1 + np.exp(-z))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "z = np.linspace(-5, 5, 200)\n", + "\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [1, 1], 'k--')\n", + "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n", + "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n", + "plt.plot(z, logit(z), \"b-\", linewidth=2)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.grid(True)\n", + "plt.title(\"Sigmoid activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -0.2, 1.2])\n", + "\n", + "save_fig(\"sigmoid_saturation_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def leaky_relu(z, alpha=0.01):\n", + " return np.maximum(alpha*z, z)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n", + "plt.grid(True)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -0.5, 4.2])\n", + "\n", + "save_fig(\"leaky_relu_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def elu(z, alpha=1):\n", + " return np.where(z<0, alpha*(np.exp(z)-1), z)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.plot(z, elu(z), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [-1, -1], 'k--')\n", + "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", + "plt.grid(True)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n", + "plt.axis([-5, 5, -2.2, 3.2])\n", + "\n", + "save_fig(\"elu_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from tensorflow.examples.tutorials.mnist import input_data\n", + "mnist = input_data.read_data_sets(\"/tmp/data/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def leaky_relu(z, name=None):\n", + " return tf.maximum(0.01 * z, z, name=name)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output, Image, display, HTML\n", + "\n", + "def strip_consts(graph_def, max_const_size=32):\n", + " \"\"\"Strip large constant values from graph_def.\"\"\"\n", + " strip_def = tf.GraphDef()\n", + " for n0 in graph_def.node:\n", + " n = strip_def.node.add() \n", + " n.MergeFrom(n0)\n", + " if n.op == 'Const':\n", + " tensor = n.attr['value'].tensor\n", + " size = len(tensor.tensor_content)\n", + " if size > max_const_size:\n", + " tensor.tensor_content = b\"\"%size\n", + " return strip_def\n", + "\n", + "def show_graph(graph_def, max_const_size=32):\n", + " \"\"\"Visualize TensorFlow graph.\"\"\"\n", + " if hasattr(graph_def, 'as_graph_def'):\n", + " graph_def = graph_def.as_graph_def()\n", + " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", + " code = \"\"\"\n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", + "\n", + " iframe = \"\"\"\n", + " \n", + " \"\"\".format(code.replace('\"', '"'))\n", + " display(HTML(iframe))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 28*28 # MNIST\n", + "n_hidden1 = 300\n", + "n_hidden2 = 100\n", + "n_outputs = 10\n", + "learning_rate = 0.01\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " hidden1 = fully_connected(X, n_hidden1, activation_fn=leaky_relu, scope=\"hidden1\")\n", + " hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=leaky_relu, scope=\"hidden2\")\n", + " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "batch_size = 100\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Batch Normalization" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from tensorflow.contrib.layers import fully_connected, batch_norm\n", + "from tensorflow.contrib.framework import arg_scope\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 28 * 28 # MNIST\n", + "n_hidden1 = 300\n", + "n_hidden2 = 100\n", + "n_outputs = 10\n", + "learning_rate = 0.01\n", + "momentum = 0.25\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " he_init = tf.contrib.layers.variance_scaling_initializer()\n", + " batch_norm_params = {\n", + " 'is_training': is_training,\n", + " 'decay': 0.9,\n", + " 'updates_collections': None,\n", + " 'scale': True,\n", + " }\n", + "\n", + " with arg_scope(\n", + " [fully_connected],\n", + " activation_fn=tf.nn.elu,\n", + " weights_initializer=he_init,\n", + " normalizer_fn=batch_norm,\n", + " normalizer_params=batch_norm_params):\n", + " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", + " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", + " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " he_init = tf.contrib.layers.variance_scaling_initializer()\n", + " batch_norm_params = {\n", + " 'is_training': is_training,\n", + " 'decay': 0.9,\n", + " 'updates_collections': None,\n", + " 'scale': True,\n", + " }\n", + "\n", + " with arg_scope(\n", + " [fully_connected],\n", + " activation_fn=tf.nn.elu,\n", + " weights_initializer=he_init,\n", + " normalizer_fn=batch_norm,\n", + " normalizer_params=batch_norm_params,\n", + " weights_regularizer=tf.contrib.layers.l1_regularizer(0.01)):\n", + " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", + " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", + " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", + " base_loss = tf.reduce_mean(xentropy, name=\"base_loss\")\n", + " loss = tf.add(base_loss, reg_losses, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "[v.name for v in tf.all_variables()]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.variable_scope(\"\", reuse=True):\n", + " weights1 = tf.get_variable(\"hidden1/weights\")\n", + " weights2 = tf.get_variable(\"hidden2/weights\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "x = tf.constant([0., 0., 3., 4., 30., 40., 300., 400.], shape=(4, 2))\n", + "c = tf.clip_by_norm(x, clip_norm=10)\n", + "c0 = tf.clip_by_norm(x, clip_norm=350, axes=0)\n", + "c1 = tf.clip_by_norm(x, clip_norm=10, axes=1)\n", + "\n", + "with tf.Session() as sess:\n", + " xv = x.eval()\n", + " cv = c.eval()\n", + " c0v = c0.eval()\n", + " c1v = c1.eval()\n", + "\n", + "print(xv)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(cv)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(np.linalg.norm(cv))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(c0v)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(np.linalg.norm(c0v, axis=0))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(c1v)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(np.linalg.norm(c1v, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", + "\n", + "def max_norm_regularizer(threshold, axes=1, name=\"max_norm\", collection=\"max_norm\"):\n", + " def max_norm(weights):\n", + " clip_weights = tf.assign(weights, tf.clip_by_norm(weights, clip_norm=threshold, axes=axes), name=name)\n", + " tf.add_to_collection(collection, clip_weights)\n", + " return None # there is no regularization loss term\n", + " return max_norm\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " with arg_scope(\n", + " [fully_connected],\n", + " weights_regularizer=max_norm_regularizer(1.5)):\n", + " hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n", + " hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n", + " logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + "clip_all_weights = tf.get_collection(\"max_norm\")\n", + " \n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", + " threshold = 1.0\n", + " grads_and_vars = optimizer.compute_gradients(loss)\n", + " capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)\n", + " for grad, var in grads_and_vars]\n", + " training_op = optimizer.apply_gradients(capped_gvs)\n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "show_graph(tf.get_default_graph())" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from tensorflow.contrib.layers import dropout\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", + "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", + "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", + "\n", + "initial_learning_rate = 0.1\n", + "decay_steps = 10000\n", + "decay_rate = 1/10\n", + "global_step = tf.Variable(0, trainable=False)\n", + "learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n", + " decay_steps, decay_rate)\n", + "\n", + "keep_prob = 0.5\n", + "\n", + "with tf.name_scope(\"dnn\"):\n", + " he_init = tf.contrib.layers.variance_scaling_initializer()\n", + " with arg_scope(\n", + " [fully_connected],\n", + " activation_fn=tf.nn.elu,\n", + " weights_initializer=he_init):\n", + " X_drop = dropout(X, keep_prob, is_training=is_training)\n", + " hidden1 = fully_connected(X_drop, n_hidden1, scope=\"hidden1\")\n", + " hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)\n", + " hidden2 = fully_connected(hidden1_drop, n_hidden2, scope=\"hidden2\")\n", + " hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)\n", + " logits = fully_connected(hidden2_drop, n_outputs, activation_fn=None, scope=\"outputs\")\n", + "\n", + "with tf.name_scope(\"loss\"):\n", + " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "\n", + "with tf.name_scope(\"train\"):\n", + " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", + " training_op = optimizer.minimize(loss, global_step=global_step) \n", + "\n", + "with tf.name_scope(\"eval\"):\n", + " correct = tf.nn.in_top_k(logits, y, 1)\n", + " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 20\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + "\n", + " save_path = saver.save(sess, \"my_model_final.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n", + " scope=\"hidden[2]|outputs\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "training_op2 = optimizer.minimize(loss, var_list=train_vars)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for i in tf.all_variables():\n", + " print(i.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n", + " print(i.name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for i in train_vars:\n", + " print(i.name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train = mnist.train.images\n", + "y_train = mnist.train.labels.astype(\"int\")\n", + "X_val = mnist.test.images[8000:]\n", + "y_val = mnist.test.labels[8000:].astype(\"int\")\n", + "\n", + "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n", + "dnn_clf = tf.contrib.learn.DNNClassifier(\n", + " feature_columns = feature_columns,\n", + " hidden_units=[300, 100],\n", + " n_classes=10,\n", + " model_dir=\"/tmp/my_model\",\n", + " config=tf.contrib.learn.RunConfig(save_checkpoints_secs=60)\n", + " )\n", + "\n", + "validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(\n", + " X_val,\n", + " y_val,\n", + " every_n_steps=50,\n", + " early_stopping_metric=\"loss\",\n", + " early_stopping_metric_minimize=True,\n", + " early_stopping_rounds=2000\n", + " )\n", + "\n", + "dnn_clf.fit(x=X_train,\n", + " y=y_train,\n", + " steps=40000,\n", + " monitors=[validation_monitor]\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": { + "height": "360px", + "width": "416px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/12_distributed_tensorflow.ipynb b/12_distributed_tensorflow.ipynb new file mode 100644 index 0000000..c438d48 --- /dev/null +++ b/12_distributed_tensorflow.ipynb @@ -0,0 +1,494 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 12 – Distributed TensorFlow**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 12._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"distributed\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Local server" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "c = tf.constant(\"Hello distributed TensorFlow!\")\n", + "server = tf.train.Server.create_local_server()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session(server.target) as sess:\n", + " print(sess.run(c))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cluster" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cluster_spec = tf.train.ClusterSpec({\n", + " \"ps\": [\n", + " \"127.0.0.1:2221\", # /job:ps/task:0\n", + " \"127.0.0.1:2222\", # /job:ps/task:1\n", + " ],\n", + " \"worker\": [\n", + " \"127.0.0.1:2223\", # /job:worker/task:0\n", + " \"127.0.0.1:2224\", # /job:worker/task:1\n", + " \"127.0.0.1:2225\", # /job:worker/task:2\n", + " ]})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "task_ps0 = tf.train.Server(cluster_spec, job_name=\"ps\", task_index=0)\n", + "task_ps1 = tf.train.Server(cluster_spec, job_name=\"ps\", task_index=1)\n", + "task_worker0 = tf.train.Server(cluster_spec, job_name=\"worker\", task_index=0)\n", + "task_worker1 = tf.train.Server(cluster_spec, job_name=\"worker\", task_index=1)\n", + "task_worker2 = tf.train.Server(cluster_spec, job_name=\"worker\", task_index=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pinning operations across devices and servers" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "with tf.device(\"/job:ps\"):\n", + " a = tf.Variable(1.0, name=\"a\")\n", + "\n", + "with tf.device(\"/job:worker\"):\n", + " b = a + 2\n", + "\n", + "with tf.device(\"/job:worker/task:1\"):\n", + " c = a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session(\"grpc://127.0.0.1:2221\") as sess:\n", + " sess.run(a.initializer)\n", + " print(c.eval())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "with tf.device(tf.train.replica_device_setter(\n", + " ps_tasks=2,\n", + " ps_device=\"/job:ps\",\n", + " worker_device=\"/job:worker\")):\n", + " v1 = tf.Variable(1.0, name=\"v1\") # pinned to /job:ps/task:0 (defaults to /cpu:0)\n", + " v2 = tf.Variable(2.0, name=\"v2\") # pinned to /job:ps/task:1 (defaults to /cpu:0)\n", + " v3 = tf.Variable(3.0, name=\"v3\") # pinned to /job:ps/task:0 (defaults to /cpu:0)\n", + " s = v1 + v2 # pinned to /job:worker (defaults to task:0/cpu:0)\n", + " with tf.device(\"/task:1\"):\n", + " p1 = 2 * s # pinned to /job:worker/task:1 (defaults to /cpu:0)\n", + " with tf.device(\"/cpu:0\"):\n", + " p2 = 3 * s # pinned to /job:worker/task:1/cpu:0\n", + "\n", + "config = tf.ConfigProto()\n", + "config.log_device_placement = True\n", + "\n", + "with tf.Session(\"grpc://127.0.0.1:2221\", config=config) as sess:\n", + " v1.initializer.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Readers" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "test_csv = open(\"my_test.csv\", \"w\")\n", + "test_csv.write(\"x1, x2 , target\\n\")\n", + "test_csv.write(\"1., , 0\\n\")\n", + "test_csv.write(\"4., 5. , 1\\n\")\n", + "test_csv.write(\"7., 8. , 0\\n\")\n", + "test_csv.close()\n", + "\n", + "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n", + "filename = tf.placeholder(tf.string)\n", + "enqueue_filename = filename_queue.enqueue([filename])\n", + "close_filename_queue = filename_queue.close()\n", + "\n", + "reader = tf.TextLineReader(skip_header_lines=1)\n", + "key, value = reader.read(filename_queue)\n", + "\n", + "x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])\n", + "features = tf.pack([x1, x2])\n", + "\n", + "instance_queue = tf.RandomShuffleQueue(\n", + " capacity=10, min_after_dequeue=2,\n", + " dtypes=[tf.float32, tf.int32], shapes=[[2],[]],\n", + " name=\"instance_q\", shared_name=\"shared_instance_q\")\n", + "enqueue_instance = instance_queue.enqueue([features, target])\n", + "close_instance_queue = instance_queue.close()\n", + "\n", + "minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(enqueue_filename, feed_dict={filename: \"my_test.csv\"})\n", + " sess.run(close_filename_queue)\n", + " try:\n", + " while True:\n", + " sess.run(enqueue_instance)\n", + " except tf.errors.OutOfRangeError as ex:\n", + " print(\"No more files to read\")\n", + " sess.run(close_instance_queue)\n", + " try:\n", + " while True:\n", + " print(sess.run([minibatch_instances, minibatch_targets]))\n", + " except tf.errors.OutOfRangeError as ex:\n", + " print(\"No more training instances\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#coord = tf.train.Coordinator()\n", + "#threads = tf.train.start_queue_runners(coord=coord)\n", + "#filename_queue = tf.train.string_input_producer([\"test.csv\"])\n", + "#coord.request_stop()\n", + "#coord.join(threads)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Queue runners and coordinators" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n", + "filename = tf.placeholder(tf.string)\n", + "enqueue_filename = filename_queue.enqueue([filename])\n", + "close_filename_queue = filename_queue.close()\n", + "\n", + "reader = tf.TextLineReader(skip_header_lines=1)\n", + "key, value = reader.read(filename_queue)\n", + "\n", + "x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])\n", + "features = tf.pack([x1, x2])\n", + "\n", + "instance_queue = tf.RandomShuffleQueue(\n", + " capacity=10, min_after_dequeue=2,\n", + " dtypes=[tf.float32, tf.int32], shapes=[[2],[]],\n", + " name=\"instance_q\", shared_name=\"shared_instance_q\")\n", + "enqueue_instance = instance_queue.enqueue([features, target])\n", + "close_instance_queue = instance_queue.close()\n", + "\n", + "minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)\n", + "\n", + "n_threads = 5\n", + "queue_runner = tf.train.QueueRunner(instance_queue, [enqueue_instance] * n_threads)\n", + "coord = tf.train.Coordinator()\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(enqueue_filename, feed_dict={filename: \"my_test.csv\"})\n", + " sess.run(close_filename_queue)\n", + " enqueue_threads = queue_runner.create_threads(sess, coord=coord, start=True)\n", + " try:\n", + " while True:\n", + " print(sess.run([minibatch_instances, minibatch_targets]))\n", + " except tf.errors.OutOfRangeError as ex:\n", + " print(\"No more training instances\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "def read_and_push_instance(filename_queue, instance_queue):\n", + " reader = tf.TextLineReader(skip_header_lines=1)\n", + " key, value = reader.read(filename_queue)\n", + " x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])\n", + " features = tf.pack([x1, x2])\n", + " enqueue_instance = instance_queue.enqueue([features, target])\n", + " return enqueue_instance\n", + "\n", + "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n", + "filename = tf.placeholder(tf.string)\n", + "enqueue_filename = filename_queue.enqueue([filename])\n", + "close_filename_queue = filename_queue.close()\n", + "\n", + "instance_queue = tf.RandomShuffleQueue(\n", + " capacity=10, min_after_dequeue=2,\n", + " dtypes=[tf.float32, tf.int32], shapes=[[2],[]],\n", + " name=\"instance_q\", shared_name=\"shared_instance_q\")\n", + "\n", + "minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)\n", + "\n", + "read_and_enqueue_ops = [read_and_push_instance(filename_queue, instance_queue) for i in range(5)]\n", + "queue_runner = tf.train.QueueRunner(instance_queue, read_and_enqueue_ops)\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(enqueue_filename, feed_dict={filename: \"my_test.csv\"})\n", + " sess.run(close_filename_queue)\n", + " coord = tf.train.Coordinator()\n", + " enqueue_threads = queue_runner.create_threads(sess, coord=coord, start=True)\n", + " try:\n", + " while True:\n", + " print(sess.run([minibatch_instances, minibatch_targets]))\n", + " except tf.errors.OutOfRangeError as ex:\n", + " print(\"No more training instances\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setting a timeout" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "q = tf.FIFOQueue(capacity=10, dtypes=[tf.float32], shapes=[()])\n", + "v = tf.placeholder(tf.float32)\n", + "enqueue = q.enqueue([v])\n", + "dequeue = q.dequeue()\n", + "output = dequeue + 1\n", + "\n", + "config = tf.ConfigProto()\n", + "config.operation_timeout_in_ms = 1000\n", + "\n", + "with tf.Session(config=config) as sess:\n", + " sess.run(enqueue, feed_dict={v: 1.0})\n", + " sess.run(enqueue, feed_dict={v: 2.0})\n", + " sess.run(enqueue, feed_dict={v: 3.0})\n", + " print(sess.run(output))\n", + " print(sess.run(output, feed_dict={dequeue: 5}))\n", + " print(sess.run(output))\n", + " print(sess.run(output))\n", + " try:\n", + " print(sess.run(output))\n", + " except tf.errors.DeadlineExceededError as ex:\n", + " print(\"Timed out while dequeuing\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": {}, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb new file mode 100644 index 0000000..b4ea324 --- /dev/null +++ b/13_convolutional_neural_networks.ipynb @@ -0,0 +1,613 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 13 – Convolutional Neural Networks**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 13._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"cnn\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A couple utility functions to plot grayscale and RGB images:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def plot_image(image):\n", + " plt.imshow(image, cmap=\"gray\", interpolation=\"nearest\")\n", + " plt.axis(\"off\")\n", + "\n", + "def plot_color_image(image):\n", + " plt.imshow(image.astype(np.uint8),interpolation=\"nearest\")\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And of course we will need TensorFlow:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convolutional layer" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.datasets import load_sample_images\n", + "dataset = load_sample_images()\n", + "china, flower = dataset.images\n", + "image = china[150:220, 130:250]\n", + "height, width, channels = image.shape\n", + "image_grayscale = image.mean(axis=2).astype(np.float32)\n", + "images = image_grayscale.reshape(1, height, width, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fmap = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32)\n", + "fmap[:, 3, 0, 0] = 1\n", + "fmap[3, :, 0, 1] = 1\n", + "fmap[:, :, 0, 0]\n", + "plot_image(fmap[:, :, 0, 0])\n", + "plt.show()\n", + "plot_image(fmap[:, :, 0, 1])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, height, width, 1))\n", + "feature_maps = tf.constant(fmap)\n", + "convolution = tf.nn.conv2d(X, feature_maps, strides=[1,1,1,1], padding=\"SAME\", use_cudnn_on_gpu=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " output = convolution.eval(feed_dict={X: images})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot_image(images[0, :, :, 0])\n", + "save_fig(\"china_original\", tight_layout=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot_image(output[0, :, :, 0])\n", + "save_fig(\"china_vertical\", tight_layout=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot_image(output[0, :, :, 1])\n", + "save_fig(\"china_horizontal\", tight_layout=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simple example" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import load_sample_images\n", + "dataset = np.array(load_sample_images().images, dtype=np.float32)\n", + "batch_size, height, width, channels = dataset.shape\n", + "\n", + "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n", + "filters[:, 3, :, 0] = 1 # vertical line\n", + "filters[3, :, :, 1] = 1 # horizontal line\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", + "convolution = tf.nn.conv2d(X, filters, strides=[1,2,2,1], padding=\"SAME\")\n", + "\n", + "with tf.Session() as sess:\n", + " output = sess.run(convolution, feed_dict={X: dataset})\n", + "\n", + "for image_index in (0, 1):\n", + " for feature_map_index in (0, 1):\n", + " plot_image(output[image_index, :, :, feature_map_index])\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## VALID vs SAME padding" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "filter_primes = np.array([2., 3., 5., 7., 11., 13.], dtype=np.float32)\n", + "x = tf.constant(np.arange(1, 13+1, dtype=np.float32).reshape([1, 1, 13, 1]))\n", + "filters = tf.constant(filter_primes.reshape(1, 6, 1, 1))\n", + "\n", + "valid_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='VALID')\n", + "same_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='SAME')\n", + "\n", + "with tf.Session() as sess:\n", + " print(\"VALID:\\n\", valid_conv.eval())\n", + " print(\"SAME:\\n\", same_conv.eval())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(\"VALID:\")\n", + "print(np.array([1,2,3,4,5,6]).T.dot(filter_primes))\n", + "print(np.array([6,7,8,9,10,11]).T.dot(filter_primes))\n", + "print(\"SAME:\")\n", + "print(np.array([0,1,2,3,4,5]).T.dot(filter_primes))\n", + "print(np.array([5,6,7,8,9,10]).T.dot(filter_primes))\n", + "print(np.array([10,11,12,13,0,0]).T.dot(filter_primes))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pooling layer" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import load_sample_images\n", + "dataset = np.array(load_sample_images().images, dtype=np.float32)\n", + "batch_size, height, width, channels = dataset.shape\n", + "\n", + "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n", + "filters[:, 3, :, 0] = 1 # vertical line\n", + "filters[3, :, :, 1] = 1 # horizontal line\n", + "\n", + "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", + "max_pool = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1,2,2,1], padding=\"VALID\")\n", + "\n", + "with tf.Session() as sess:\n", + " output = sess.run(max_pool, feed_dict={X: dataset})\n", + "\n", + "plot_color_image(dataset[0])\n", + "save_fig(\"china_original\")\n", + "plt.show()\n", + " \n", + "plot_color_image(output[0])\n", + "save_fig(\"china_max_pool\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MNIST" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_mldata\n", + "\n", + "mnist = fetch_mldata('MNIST original')\n", + "X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n", + "y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "height, width = 28, 28\n", + "images = X_test[5000].reshape(1, height, width, 1)\n", + "plot_image(images[0, :, :, 0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inception v3" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import tarfile\n", + "import urllib.request\n", + "\n", + "TF_MODELS_URL = \"http://download.tensorflow.org/models\"\n", + "INCEPTION_V3_URL = TF_MODELS_URL + \"/inception_v3_2016_08_28.tar.gz\"\n", + "INCEPTION_PATH = os.path.join(\"datasets\", \"inception\")\n", + "INCEPTION_V3_CHECKPOINT_PATH = os.path.join(INCEPTION_PATH, \"inception_v3.ckpt\")\n", + "\n", + "def download_progress(count, block_size, total_size):\n", + " percent = count * block_size * 100 // total_size\n", + " sys.stdout.write(\"\\rDownloading: {}%\".format(percent))\n", + " sys.stdout.flush()\n", + "\n", + "def fetch_pretrained_inception_v3(url=INCEPTION_V3_URL, path=INCEPTION_PATH):\n", + " if os.path.exists(INCEPTION_V3_CHECKPOINT_PATH):\n", + " return\n", + " os.makedirs(path, exist_ok=True)\n", + " tgz_path = os.path.join(path, \"inception_v3.tgz\")\n", + " urllib.request.urlretrieve(url, tgz_path, reporthook=download_progress)\n", + " inception_tgz = tarfile.open(tgz_path)\n", + " inception_tgz.extractall(path=path)\n", + " inception_tgz.close()\n", + " os.remove(tgz_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fetch_pretrained_inception_v3()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import re\n", + "\n", + "CLASS_NAME_REGEX = re.compile(r\"^n\\d+\\s+(.*)\\s*$\", re.M | re.U)\n", + "\n", + "def load_class_names():\n", + " with open(os.path.join(\"datasets\",\"inception\",\"imagenet_class_names.txt\"), \"rb\") as f:\n", + " content = f.read().decode(\"utf-8\")\n", + " return CLASS_NAME_REGEX.findall(content)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class_names = load_class_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "width = 299\n", + "height = 299\n", + "channels = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib.image as mpimg\n", + "test_image = mpimg.imread(os.path.join(\"images\",\"cnn\",\"test_image.png\"))[:, :, :channels]\n", + "plt.imshow(test_image)\n", + "plt.axis(\"off\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from nets.inception_v3 import inception_v3, inception_v3_arg_scope\n", + "import tensorflow.contrib.slim as slim\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name=\"X\")\n", + "with slim.arg_scope(inception_v3_arg_scope()):\n", + " logits, end_points = inception_v3(X, num_classes=1001, is_training=False)\n", + "predictions = end_points[\"Predictions\"]\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_test = test_image.reshape(-1, height, width, channels)\n", + "\n", + "with tf.Session() as sess:\n", + " saver.restore(sess, INCEPTION_V3_CHECKPOINT_PATH)\n", + " predictions_val = predictions.eval(feed_dict={X: X_test})" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class_names[np.argmax(predictions_val[0])]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.argmax(predictions_val, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "top_5 = np.argpartition(predictions_val[0], -5)[-5:]\n", + "top_5 = top_5[np.argsort(predictions_val[0][top_5])]\n", + "for i in top_5:\n", + " print(\"{0}: {1:.2f}%\".format(class_names[i], 100*predictions_val[0][i]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": {}, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb new file mode 100644 index 0000000..7f873cd --- /dev/null +++ b/14_recurrent_neural_networks.ipynb @@ -0,0 +1,1326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 14 – Recurrent Neural Networks**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code and solutions to the exercices in chapter 14._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# To support both python 2 and python 3\n", + "from __future__ import division, print_function, unicode_literals\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import numpy.random as rnd\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['axes.labelsize'] = 14\n", + "plt.rcParams['xtick.labelsize'] = 12\n", + "plt.rcParams['ytick.labelsize'] = 12\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"rnn\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then of course we will need TensorFlow:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basic RNNs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Manual RNN" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 3\n", + "n_neurons = 5\n", + "\n", + "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", + "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", + "\n", + "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n", + "Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n", + "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n", + "\n", + "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n", + "Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n", + "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(Y0_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(Y1_val)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using `rnn()`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 3\n", + "n_neurons = 5\n", + "\n", + "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", + "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", + "\n", + "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n", + "output_seqs, states = tf.nn.rnn(basic_cell, [X0, X1], dtype=tf.float32)\n", + "Y0, Y1 = output_seqs\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n", + "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Y0_val" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Y1_val" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output, Image, display, HTML\n", + "\n", + "def strip_consts(graph_def, max_const_size=32):\n", + " \"\"\"Strip large constant values from graph_def.\"\"\"\n", + " strip_def = tf.GraphDef()\n", + " for n0 in graph_def.node:\n", + " n = strip_def.node.add() \n", + " n.MergeFrom(n0)\n", + " if n.op == 'Const':\n", + " tensor = n.attr['value'].tensor\n", + " size = len(tensor.tensor_content)\n", + " if size > max_const_size:\n", + " tensor.tensor_content = \"b\"%size\n", + " return strip_def\n", + "\n", + "def show_graph(graph_def, max_const_size=32):\n", + " \"\"\"Visualize TensorFlow graph.\"\"\"\n", + " if hasattr(graph_def, 'as_graph_def'):\n", + " graph_def = graph_def.as_graph_def()\n", + " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", + " code = \"\"\"\n", + " \n", + " \n", + "
\n", + " \n", + "
\n", + " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", + "\n", + " iframe = \"\"\"\n", + " \n", + " \"\"\".format(code.replace('\"', '"'))\n", + " display(HTML(iframe))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "show_graph(tf.get_default_graph())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Packing sequences" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_steps = 2\n", + "n_inputs = 3\n", + "n_neurons = 5\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "X_seqs = tf.unpack(tf.transpose(X, perm=[1, 0, 2]))\n", + "\n", + "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n", + "output_seqs, states = tf.nn.rnn(basic_cell, X_seqs, dtype=tf.float32)\n", + "outputs = tf.transpose(tf.pack(output_seqs), perm=[1, 0, 2])\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_batch = np.array([\n", + " # t = 0 t = 1 \n", + " [[0, 1, 2], [9, 8, 7]], # instance 1\n", + " [[3, 4, 5], [0, 0, 0]], # instance 2\n", + " [[6, 7, 8], [6, 5, 4]], # instance 3\n", + " [[9, 0, 1], [3, 2, 1]], # instance 4\n", + " ])\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " outputs_val = outputs.eval(feed_dict={X: X_batch})" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(np.transpose(outputs_val, axes=[1, 0, 2])[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using `dynamic_rnn()`" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_steps = 2\n", + "n_inputs = 3\n", + "n_neurons = 5\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "\n", + "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n", + "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_batch = np.array([\n", + " [[0, 1, 2], [9, 8, 7]], # instance 1\n", + " [[3, 4, 5], [0, 0, 0]], # instance 2\n", + " [[6, 7, 8], [6, 5, 4]], # instance 3\n", + " [[9, 0, 1], [3, 2, 1]], # instance 4\n", + " ])\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " print(\"outputs =\", outputs.eval(feed_dict={X: X_batch}))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "show_graph(tf.get_default_graph())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting the sequence lengths" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_steps = 2\n", + "n_inputs = 3\n", + "n_neurons = 5\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "seq_length = tf.placeholder(tf.int32, [None])\n", + "\n", + "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n", + "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_batch = np.array([\n", + " # step 0 step 1\n", + " [[0, 1, 2], [9, 8, 7]], # instance 1\n", + " [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n", + " [[6, 7, 8], [6, 5, 4]], # instance 3\n", + " [[9, 0, 1], [3, 2, 1]], # instance 4\n", + " ])\n", + "seq_length_batch = np.array([2, 1, 2, 2])\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " outputs_val, states_val = sess.run(\n", + " [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(outputs_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(states_val)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training a sequence classifier" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_steps = 28\n", + "n_inputs = 28\n", + "n_neurons = 150\n", + "n_outputs = 10\n", + "\n", + "learning_rate = 0.001\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.int32, [None])\n", + "\n", + "with tf.variable_scope(\"\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n", + " basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", + " outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", + "\n", + "logits = fully_connected(states, n_outputs, activation_fn=None)\n", + "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + "loss = tf.reduce_mean(xentropy)\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "correct = tf.nn.in_top_k(logits, y, 1)\n", + "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from tensorflow.examples.tutorials.mnist import input_data\n", + "mnist = input_data.read_data_sets(\"/tmp/data/\")\n", + "X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n", + "y_test = mnist.test.labels" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 100\n", + "batch_size = 150\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Multi-layer RNN" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_steps = 28\n", + "n_inputs = 28\n", + "n_neurons1 = 150\n", + "n_neurons2 = 100\n", + "n_outputs = 10\n", + "\n", + "learning_rate = 0.001\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.int32, [None])\n", + "\n", + "hidden1 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n", + "hidden2 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n", + "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([hidden1, hidden2])\n", + "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "\n", + "logits = fully_connected(states, n_outputs, activation_fn=None)\n", + "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + "loss = tf.reduce_mean(xentropy)\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "correct = tf.nn.in_top_k(logits, y, 1)\n", + "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 100\n", + "batch_size = 150\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Time series" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "t_min, t_max = 0, 30\n", + "resolution = 0.1\n", + "\n", + "def time_series(t):\n", + " return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n", + "\n", + "def next_batch(batch_size, n_steps):\n", + " t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n", + " Ts = t0 + np.arange(0., n_steps + 1) * resolution\n", + " ys = time_series(Ts)\n", + " return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n", + "\n", + "n_steps = 20\n", + "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n", + "\n", + "plt.figure(figsize=(11,4))\n", + "plt.subplot(121)\n", + "plt.title(\"A time series (generated)\", fontsize=14)\n", + "plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n", + "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n", + "plt.legend(loc=\"lower left\", fontsize=14)\n", + "plt.axis([0, 30, -17, 13])\n", + "plt.xlabel(\"Time\")\n", + "plt.ylabel(\"Value\")\n", + "\n", + "plt.subplot(122)\n", + "plt.title(\"A training instance\", fontsize=14)\n", + "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", + "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", + "plt.legend(loc=\"upper left\")\n", + "plt.xlabel(\"Time\")\n", + "\n", + "\n", + "save_fig(\"time_series_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_batch, y_batch = next_batch(1, n_steps)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.c_[X_batch[0], y_batch[0]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using an `OuputProjectionWrapper`" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_steps = 20\n", + "n_inputs = 1\n", + "n_neurons = 100\n", + "n_outputs = 1\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", + "\n", + "cell = tf.nn.rnn_cell.OutputProjectionWrapper(\n", + " tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n", + " output_size=n_outputs)\n", + "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n", + "\n", + "n_outputs = 1\n", + "learning_rate = 0.001\n", + "\n", + "loss = tf.reduce_sum(tf.square(outputs - y))\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_iterations = 1000\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for iteration in range(n_iterations):\n", + " X_batch, y_batch = next_batch(batch_size, n_steps)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " if iteration % 100 == 0:\n", + " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", + " print(iteration, \"\\tMSE:\", mse)\n", + " \n", + " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", + " print(y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.title(\"Testing the model\", fontsize=14)\n", + "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", + "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", + "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", + "plt.legend(loc=\"upper left\")\n", + "plt.xlabel(\"Time\")\n", + "\n", + "save_fig(\"time_series_pred_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Without using an `OutputProjectionWrapper`" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_steps = 20\n", + "n_inputs = 1\n", + "n_neurons = 100\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", + "\n", + "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", + "rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", + "\n", + "n_outputs = 1\n", + "learning_rate = 0.001\n", + "\n", + "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", + "stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n", + "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", + "\n", + "loss = tf.reduce_sum(tf.square(outputs - y))\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_iterations = 1000\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for iteration in range(n_iterations):\n", + " X_batch, y_batch = next_batch(batch_size, n_steps)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " if iteration % 100 == 0:\n", + " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", + " print(iteration, \"\\tMSE:\", mse)\n", + " \n", + " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", + " print(y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.title(\"Testing the model\", fontsize=14)\n", + "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", + "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", + "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", + "plt.legend(loc=\"upper left\")\n", + "plt.xlabel(\"Time\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generating a creative new sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_iterations = 2000\n", + "batch_size = 50\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for iteration in range(n_iterations):\n", + " X_batch, y_batch = next_batch(batch_size, n_steps)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " if iteration % 100 == 0:\n", + " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", + " print(iteration, \"\\tMSE:\", mse)\n", + "\n", + " sequence1 = [0. for i in range(n_steps)]\n", + " for iteration in range(len(t) - n_steps):\n", + " X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n", + " y_pred = sess.run(outputs, feed_dict={X: X_batch})\n", + " sequence1.append(y_pred[0, -1, 0])\n", + "\n", + " sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n", + " for iteration in range(len(t) - n_steps):\n", + " X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n", + " y_pred = sess.run(outputs, feed_dict={X: X_batch})\n", + " sequence2.append(y_pred[0, -1, 0])\n", + "\n", + "plt.figure(figsize=(11,4))\n", + "plt.subplot(121)\n", + "plt.plot(t, sequence1, \"b-\")\n", + "plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n", + "plt.xlabel(\"Time\")\n", + "plt.ylabel(\"Value\")\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(t, sequence2, \"b-\")\n", + "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n", + "plt.xlabel(\"Time\")\n", + "#save_fig(\"creative_sequence_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deep RNN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MultiRNNCell" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 2\n", + "n_neurons = 100\n", + "n_layers = 3\n", + "n_steps = 5\n", + "keep_prob = 0.5\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n", + "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([basic_cell] * n_layers)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X_batch = rnd.rand(2, n_steps, n_inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " init.run()\n", + " outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "outputs_val.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dropout" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_inputs = 1\n", + "n_neurons = 100\n", + "n_layers = 3\n", + "n_steps = 20\n", + "n_outputs = 1\n", + "\n", + "keep_prob = 0.5\n", + "learning_rate = 0.001\n", + "\n", + "is_training = True\n", + "\n", + "def deep_rnn_with_dropout(X, y, is_training):\n", + " cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n", + " if is_training:\n", + " cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", + " multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers)\n", + " rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "\n", + " stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", + " stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n", + " outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", + "\n", + " loss = tf.reduce_sum(tf.square(outputs - y))\n", + " optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + " training_op = optimizer.minimize(loss)\n", + "\n", + " return outputs, loss, training_op\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", + "outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n", + "init = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_iterations = 2000\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " if is_training:\n", + " init.run()\n", + " for iteration in range(n_iterations):\n", + " X_batch, y_batch = next_batch(batch_size, n_steps)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " if iteration % 100 == 0:\n", + " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", + " print(iteration, \"\\tMSE:\", mse)\n", + " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", + " else:\n", + " saver.restore(sess, \"/tmp/my_model.ckpt\")\n", + " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", + " \n", + " plt.title(\"Testing the model\", fontsize=14)\n", + " plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", + " plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", + " plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", + " plt.legend(loc=\"upper left\")\n", + " plt.xlabel(\"Time\")\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LSTM" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "from tensorflow.contrib.layers import fully_connected\n", + "\n", + "n_steps = 28\n", + "n_inputs = 28\n", + "n_neurons = 150\n", + "n_outputs = 10\n", + "\n", + "learning_rate = 0.001\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.int32, [None])\n", + "\n", + "lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons, state_is_tuple=True)\n", + "multi_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*3, state_is_tuple=True)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n", + "top_layer_h_state = states[-1][1]\n", + "logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n", + "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n", + "loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "correct = tf.nn.in_top_k(logits, y, 1)\n", + "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "states" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "top_layer_h_state" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 10\n", + "batch_size = 150\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " for iteration in range(len(mnist.test.labels)//batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", + " print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributing layers across devices" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):\n", + " def __init__(self, device, cell):\n", + " self._cell = cell\n", + " self._device = device\n", + "\n", + " @property\n", + " def state_size(self):\n", + " return self._cell.state_size\n", + "\n", + " @property\n", + " def output_size(self):\n", + " return self._cell.output_size\n", + "\n", + " def __call__(self, inputs, state, scope=None):\n", + " with tf.device(self._device):\n", + " return self._cell(inputs, state, scope)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 5\n", + "n_neurons = 100\n", + "devices = [\"/cpu:0\"]*5\n", + "n_steps = 20\n", + "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n", + "lstm_cells = [DeviceCellWrapper(device, tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons))\n", + " for device in devices]\n", + "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "init = tf.initialize_all_variables()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " init.run()\n", + " print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + }, + "nav_menu": {}, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/classification.ipynb b/classification.ipynb index 4c7cc98..ea7f79b 100644 --- a/classification.ipynb +++ b/classification.ipynb @@ -4,7 +4,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Classification**" + "**Chapter 3 – Classification**\n", + "\n", + "_This notebook contains all the sample code and solutions to the exercices in chapter 3._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] }, { @@ -15,14 +31,18 @@ }, "outputs": [], "source": [ + "# To support both python 2 and python 3\n", "from __future__ import division, print_function, unicode_literals\n", "\n", + "# Common imports\n", "import numpy as np\n", "import numpy.random as rnd\n", - "rnd.seed(42) # to make this notebook's output stable across runs\n", - "\n", "import os\n", "\n", + "# to make this notebook's output stable across runs\n", + "rnd.seed(42)\n", + "\n", + "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", @@ -30,6 +50,7 @@ "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", + "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"classification\"\n", "\n", @@ -122,7 +143,7 @@ "some_digit_index = 36000\n", "some_digit = X[some_digit_index]\n", "plot_digit(some_digit)\n", - "save_fig(\"some_digit\")\n", + "save_fig(\"some_digit_plot\")\n", "plt.show()" ] }, @@ -153,7 +174,7 @@ "plt.figure(figsize=(9,9))\n", "example_images = np.r_[X[:12000:600], X[13000:30600:600], X[30600:60000:590]]\n", "plot_digits(example_images, images_per_row=10)\n", - "save_fig(\"more_digits\")\n", + "save_fig(\"more_digits_plot\")\n", "plt.show()" ] }, @@ -980,7 +1001,7 @@ "some_index = 5500\n", "plt.subplot(121); plot_digit(X_test_mod[some_index])\n", "plt.subplot(122); plot_digit(y_test_mod[some_index])\n", - "save_fig(\"noisy_digit_example\")\n", + "save_fig(\"noisy_digit_example_plot\")\n", "plt.show()" ] }, @@ -1005,7 +1026,7 @@ "source": [ "clean_digit = knn_clf.predict([X_test_mod[some_index]])\n", "plot_digit(clean_digit)\n", - "save_fig(\"cleaned_digit_example\")\n", + "save_fig(\"cleaned_digit_example_plot\")\n", "plt.show()" ] }, @@ -1183,6 +1204,31 @@ "source": [ "plot_digit(ambiguous_digit)" ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Coming soon**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -1203,10 +1249,14 @@ "pygments_lexer": "ipython3", "version": "3.5.1" }, + "nav_menu": {}, "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, "toc_cell": false, - "toc_number_sections": true, - "toc_threshold": 6, + "toc_section_display": "block", "toc_window_display": false } }, diff --git a/images/ann/README b/images/ann/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/ann/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/autoencoders/README b/images/autoencoders/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/autoencoders/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/cnn/README b/images/cnn/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/cnn/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/cnn/test_image.png b/images/cnn/test_image.png new file mode 100644 index 0000000..2d53756 Binary files /dev/null and b/images/cnn/test_image.png differ diff --git a/images/decision_trees/README b/images/decision_trees/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/decision_trees/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/deep/README b/images/deep/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/deep/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/dim_reduction/README b/images/dim_reduction/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/dim_reduction/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/distributed/README b/images/distributed/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/distributed/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/ensembles/README b/images/ensembles/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/ensembles/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/rl/README b/images/rl/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/rl/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/rnn/README b/images/rnn/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/rnn/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/svm/README b/images/svm/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/svm/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/images/tensorflow/README b/images/tensorflow/README new file mode 100644 index 0000000..1c4283b --- /dev/null +++ b/images/tensorflow/README @@ -0,0 +1 @@ +Images generated by the notebooks diff --git a/index.ipynb b/index.ipynb index c9878a6..a1c72fe 100644 --- a/index.ipynb +++ b/index.ipynb @@ -16,19 +16,29 @@ "\n", "### To run the examples\n", "* **Jupyter** – These notebooks are based on Jupyter. If you just plan to read without running any code, there's really nothing more to know, just keep reading! But if you want to experiment with the code examples you need to:\n", - " * open these notebooks in Jupyter. If you clicked on the \"launch binder\" button in github or followed the Installation instructions, then you are good to go. If not you will need to go back to the project [home page](https://github.com/ageron/ml-notebooks/) and click on \"launch binder\" or follow the installation instructions.\n", + " * open these notebooks in Jupyter. If you clicked on the \"launch binder\" button in github or followed the Installation instructions, then you are good to go. If not you will need to go back to the project [home page](https://github.com/ageron/handson-ml/) and click on \"launch binder\" or follow the installation instructions.\n", " * learn how to use Jupyter. Start the User Interface Tour from the Help menu.\n", "\n", "### To activate extensions\n", - "* If this is an interactive session (see above), you may want to turn on a few Jupyter extensions by going to the [Extension Configuration](../nbextensions/) page. In particular the \"*table of contents (2)*\" extension is quite useful.\n", + "* If this is an interactive session (see above), you may want to turn on a few Jupyter extensions by going to the [Extension Configuration](../nbextensions/) page. In particular the \"*Table of Contents (2)*\" extension is quite useful.\n", "\n", - "## Chapters\n", - "1. [Fundamentals](fundamentals.ipynb)\n", - "2. [End-to-end project](end_to_end_project.ipynb)\n", - "3. [Classification](classification.ipynb)\n", - "4. [Training Linear Models](training_linear_models.ipynb)\n", - "\n", - "More explanations and chapters coming soon.\n", + "## Notebooks\n", + "1. [The Machine Learning landscape](01_the_machine_learning_landscape.ipynb)\n", + "2. [End-to-end Machine Learning project](02_end_to_end_machine_learning_project.ipynb)\n", + "3. [Classification](03_classification.ipynb)\n", + "4. [Training Linear Models](04_training_linear_models.ipynb)\n", + "5. [Support Vector Machines](05_support_vector_machines.ipynb)\n", + "6. [Decision Trees](06_decision_trees.ipynb)\n", + "7. [Ensemble Learning and Random Forests](07_ensemble_learning_and_random_forests.ipynb)\n", + "8. [Dimensionality Reduction](08_dimensionality_reduction.ipynb)\n", + "9. [Up and running with TensorFlow](09_up_and_running_with_tensorflow.ipynb)\n", + "10. [Introduction to Artificial Neural Networks](10_introduction_to_artificial_neural_networks.ipynb)\n", + "11. [Deep Learning](11_deep_learning.ipynb)\n", + "12. [Distributed TensorFlow](12_distributed_tensorflow.ipynb)\n", + "13. [Convolutional Neural Networks](13_convolutional_neural_networks.ipynb)\n", + "14. [Recurrent Neural Networks](14_recurrent_neural_networks.ipynb)\n", + "15. Autoencoders (coming soon)\n", + "16. Reinforcement Learning (coming soon)\n", "\n", "## Scientific Python tutorials\n", "* [NumPy](tools_numpy.ipynb)\n", @@ -39,6 +49,15 @@ "* [Linear Algebra](math_linear_algebra.ipynb)\n", "* Calculus (coming soon)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -59,10 +78,14 @@ "pygments_lexer": "ipython2", "version": "2.7.11" }, + "nav_menu": {}, "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, "toc_cell": false, - "toc_number_sections": true, - "toc_threshold": 6, + "toc_section_display": "block", "toc_window_display": false } }, diff --git a/nets/inception_v3.py b/nets/inception_v3.py index d5a1fe3..2897a4b 100644 --- a/nets/inception_v3.py +++ b/nets/inception_v3.py @@ -94,7 +94,9 @@ def inception_v3_base(inputs, raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) - with tf.variable_scope(scope, 'InceptionV3', [inputs]): + #Backported to 0.10.0 + #with tf.variable_scope(scope, 'InceptionV3', [inputs]): + with tf.variable_scope(scope or 'InceptionV3'): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='VALID'): # 299 x 299 x 3 @@ -470,8 +472,10 @@ def inception_v3(inputs, raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) - with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], - reuse=reuse) as scope: + #Backported to 0.10.0 + #with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], + # reuse=reuse) as scope: + with tf.variable_scope(scope or 'InceptionV3', reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v3_base(