diff --git a/.gitignore b/.gitignore
index a5684d8..89c7162 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,10 @@
-.ipynb_checkpoints
-.DS_Store
-my_*
-images/**/*.png
+*.bak
+*.ckpt
 *.pyc
+.DS_Store
+.ipynb_checkpoints
+checkpoint
+logs/*
+tf_logs/*
+images/**/*.png
+my_*
diff --git a/fundamentals.ipynb b/01_the_machine_learning_landscape.ipynb
similarity index 99%
rename from fundamentals.ipynb
rename to 01_the_machine_learning_landscape.ipynb
index df6d6dc..9647fa6 100644
--- a/fundamentals.ipynb
+++ b/01_the_machine_learning_landscape.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Chapter 1 – Fundamentals of Machine Learning**\n",
+    "**Chapter 1 – The Machine Learning landscape**\n",
     "\n",
     "_This is the code used to generate some of the figures in chapter 1._"
    ]
diff --git a/end_to_end_project.ipynb b/02_end_to_end_machine_learning_project.ipynb
similarity index 99%
rename from end_to_end_project.ipynb
rename to 02_end_to_end_machine_learning_project.ipynb
index 875a1be..25278be 100644
--- a/end_to_end_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Chapter 2 – End to end Machine Learning project**\n",
+    "**Chapter 2 – End-to-end Machine Learning project**\n",
     "\n",
     "*Welcome to Machine Learning Housing Corp.! Your task is to predict median house values in Californian districts, given a number of features from these districts.*\n",
     "\n",
diff --git a/training_linear_models.ipynb b/04_training_linear_models.ipynb
similarity index 100%
rename from training_linear_models.ipynb
rename to 04_training_linear_models.ipynb
diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb
new file mode 100644
index 0000000..42937b0
--- /dev/null
+++ b/05_support_vector_machines.ipynb
@@ -0,0 +1,1248 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 5 – Support Vector Machines**\n",
+    "\n",
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 5._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"svm\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Large margin classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "iris = datasets.load_iris()\n",
+    "X = iris[\"data\"][:, (2, 3)]  # petal length, petal width\n",
+    "y = iris[\"target\"]\n",
+    "\n",
+    "setosa_or_versicolour = (y == 0) | (y == 1)\n",
+    "X = X[setosa_or_versicolour]\n",
+    "y = y[setosa_or_versicolour]\n",
+    "\n",
+    "# SVM Classifier model\n",
+    "svm_clf = SVC(kernel=\"linear\", C=float(\"inf\"))\n",
+    "svm_clf.fit(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Bad models\n",
+    "x0 = np.linspace(0, 5.5, 200)\n",
+    "pred_1 = 5*x0 - 20\n",
+    "pred_2 = x0 - 1.8\n",
+    "pred_3 = 0.1 * x0 + 0.5\n",
+    "\n",
+    "def plot_svc_decision_boundary(svm_clf, xmin, xmax):\n",
+    "    w = svm_clf.coef_[0]\n",
+    "    b = svm_clf.intercept_[0]\n",
+    "\n",
+    "    # At the decision boundary, w0*x0 + w1*x1 + b = 0\n",
+    "    # => x1 = -w0/w1 * x0 - b/w1\n",
+    "    x0 = np.linspace(xmin, xmax, 200)\n",
+    "    decision_boundary = -w[0]/w[1] * x0 - b/w[1]\n",
+    "\n",
+    "    margin = 1/w[1]\n",
+    "    gutter_up = decision_boundary + margin\n",
+    "    gutter_down = decision_boundary - margin\n",
+    "\n",
+    "    svs = svm_clf.support_vectors_\n",
+    "    plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA')\n",
+    "    plt.plot(x0, decision_boundary, \"k-\", linewidth=2)\n",
+    "    plt.plot(x0, gutter_up, \"k--\", linewidth=2)\n",
+    "    plt.plot(x0, gutter_down, \"k--\", linewidth=2)\n",
+    "\n",
+    "plt.figure(figsize=(12,2.7))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.plot(x0, pred_1, \"g--\", linewidth=2)\n",
+    "plt.plot(x0, pred_2, \"m-\", linewidth=2)\n",
+    "plt.plot(x0, pred_3, \"r-\", linewidth=2)\n",
+    "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\", label=\"Iris-Versicolour\")\n",
+    "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\", label=\"Iris-Setosa\")\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "plt.legend(loc=\"upper left\", fontsize=14)\n",
+    "plt.axis([0, 5.5, 0, 2])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plot_svc_decision_boundary(svm_clf, 0, 5.5)\n",
+    "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\")\n",
+    "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\")\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.axis([0, 5.5, 0, 2])\n",
+    "\n",
+    "save_fig(\"large_margin_classification_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Sensitivity to feature scales"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "Xs = np.array([[1, 50], [5, 20], [3, 80], [5, 60]]).astype(np.float64)\n",
+    "ys = np.array([0, 0, 1, 1])\n",
+    "svm_clf = SVC(kernel=\"linear\", C=100)\n",
+    "svm_clf.fit(Xs, ys)\n",
+    "\n",
+    "plt.figure(figsize=(12,3.2))\n",
+    "plt.subplot(121)\n",
+    "plt.plot(Xs[:, 0][ys==1], Xs[:, 1][ys==1], \"bo\")\n",
+    "plt.plot(Xs[:, 0][ys==0], Xs[:, 1][ys==0], \"ms\")\n",
+    "plot_svc_decision_boundary(svm_clf, 0, 6)\n",
+    "plt.xlabel(\"$x_0$\", fontsize=20)\n",
+    "plt.ylabel(\"$x_1$  \", fontsize=20, rotation=0)\n",
+    "plt.title(\"Unscaled\", fontsize=16)\n",
+    "plt.axis([0, 6, 0, 90])\n",
+    "\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "X_scaled = scaler.fit_transform(Xs)\n",
+    "svm_clf.fit(X_scaled, ys)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(X_scaled[:, 0][ys==1], X_scaled[:, 1][ys==1], \"bo\")\n",
+    "plt.plot(X_scaled[:, 0][ys==0], X_scaled[:, 1][ys==0], \"ms\")\n",
+    "plot_svc_decision_boundary(svm_clf, -2, 2)\n",
+    "plt.xlabel(\"$x_0$\", fontsize=20)\n",
+    "plt.title(\"Scaled\", fontsize=16)\n",
+    "plt.axis([-2, 2, -2, 2])\n",
+    "\n",
+    "save_fig(\"sensitivity_to_feature_scales_plot\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Sensitivity to outliers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_outliers = np.array([[3.4, 1.3], [3.2, 0.8]])\n",
+    "y_outliers = np.array([0, 0])\n",
+    "Xo1 = np.concatenate([X, X_outliers[:1]], axis=0)\n",
+    "yo1 = np.concatenate([y, y_outliers[:1]], axis=0)\n",
+    "Xo2 = np.concatenate([X, X_outliers[1:]], axis=0)\n",
+    "yo2 = np.concatenate([y, y_outliers[1:]], axis=0)\n",
+    "\n",
+    "svm_clf2 = SVC(kernel=\"linear\", C=10**9)#float(\"inf\"))\n",
+    "svm_clf2.fit(Xo2, yo2)\n",
+    "\n",
+    "plt.figure(figsize=(12,2.7))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.plot(Xo1[:, 0][yo1==1], Xo1[:, 1][yo1==1], \"bs\")\n",
+    "plt.plot(Xo1[:, 0][yo1==0], Xo1[:, 1][yo1==0], \"yo\")\n",
+    "plt.text(0.3, 1.0, \"Impossible!\", fontsize=24, color=\"red\")\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "plt.annotate(\"Outlier\",\n",
+    "             xy=(X_outliers[0][0], X_outliers[0][1]),\n",
+    "             xytext=(2.5, 1.7),\n",
+    "             ha=\"center\",\n",
+    "             arrowprops=dict(facecolor='black', shrink=0.1),\n",
+    "             fontsize=16,\n",
+    "            )\n",
+    "plt.axis([0, 5.5, 0, 2])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(Xo2[:, 0][yo2==1], Xo2[:, 1][yo2==1], \"bs\")\n",
+    "plt.plot(Xo2[:, 0][yo2==0], Xo2[:, 1][yo2==0], \"yo\")\n",
+    "plot_svc_decision_boundary(svm_clf2, 0, 5.5)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.annotate(\"Outlier\",\n",
+    "             xy=(X_outliers[1][0], X_outliers[1][1]),\n",
+    "             xytext=(3.2, 0.08),\n",
+    "             ha=\"center\",\n",
+    "             arrowprops=dict(facecolor='black', shrink=0.1),\n",
+    "             fontsize=16,\n",
+    "            )\n",
+    "plt.axis([0, 5.5, 0, 2])\n",
+    "\n",
+    "save_fig(\"sensitivity_to_outliers_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Large margin *vs* margin violations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn import datasets\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.svm import LinearSVC\n",
+    "\n",
+    "iris = datasets.load_iris()\n",
+    "X = iris[\"data\"][:, (2, 3)]  # petal length, petal width\n",
+    "y = (iris[\"target\"] == 2).astype(np.float64)  # Iris-Virginica\n",
+    "\n",
+    "scaler = StandardScaler()\n",
+    "svm_clf1 = LinearSVC(C=100, loss=\"hinge\")\n",
+    "svm_clf2 = LinearSVC(C=1, loss=\"hinge\")\n",
+    "\n",
+    "scaled_svm_clf1 = Pipeline((\n",
+    "        (\"scaler\", scaler),\n",
+    "        (\"linear_svc\", svm_clf1),\n",
+    "    ))\n",
+    "scaled_svm_clf2 = Pipeline((\n",
+    "        (\"scaler\", scaler),\n",
+    "        (\"linear_svc\", svm_clf2),\n",
+    "    ))\n",
+    "\n",
+    "scaled_svm_clf1.fit(X, y)\n",
+    "scaled_svm_clf2.fit(X, y)\n",
+    "\n",
+    "scaled_svm_clf2.predict([[5.5, 1.7]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Convert to unscaled parameters\n",
+    "b1 = svm_clf1.decision_function([-scaler.mean_ / scaler.scale_])\n",
+    "b2 = svm_clf2.decision_function([-scaler.mean_ / scaler.scale_])\n",
+    "w1 = svm_clf1.coef_[0] / scaler.scale_\n",
+    "w2 = svm_clf2.coef_[0] / scaler.scale_\n",
+    "svm_clf1.intercept_ = np.array([b1])\n",
+    "svm_clf2.intercept_ = np.array([b2])\n",
+    "svm_clf1.coef_ = np.array([w1])\n",
+    "svm_clf2.coef_ = np.array([w2])\n",
+    "\n",
+    "# Find support vectors (LinearSVC does not do this automatically)\n",
+    "t = y * 2 - 1\n",
+    "support_vectors_idx1 = (t * (X.dot(w1) + b1) < 1).ravel()\n",
+    "support_vectors_idx2 = (t * (X.dot(w2) + b2) < 1).ravel()\n",
+    "svm_clf1.support_vectors_ = X[support_vectors_idx1]\n",
+    "svm_clf2.support_vectors_ = X[support_vectors_idx2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12,3.2))\n",
+    "plt.subplot(121)\n",
+    "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\", label=\"Iris-Virginica\")\n",
+    "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\", label=\"Iris-Versicolour\")\n",
+    "plot_svc_decision_boundary(svm_clf1, 4, 6)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "plt.legend(loc=\"upper left\", fontsize=14)\n",
+    "plt.title(\"$C = {}$\".format(svm_clf1.C), fontsize=16)\n",
+    "plt.axis([4, 6, 0.8, 2.8])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")\n",
+    "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
+    "plot_svc_decision_boundary(svm_clf2, 4, 6)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.title(\"$C = {}$\".format(svm_clf2.C), fontsize=16)\n",
+    "plt.axis([4, 6, 0.8, 2.8])\n",
+    "\n",
+    "save_fig(\"regularization_plot\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Non-linear classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X1D = np.linspace(-4, 4, 9).reshape(-1, 1)\n",
+    "X2D = np.c_[X1D, X1D**2]\n",
+    "y = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0])\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.grid(True, which='both')\n",
+    "plt.axhline(y=0, color='k')\n",
+    "plt.plot(X1D[:, 0][y==0], np.zeros(4), \"bs\")\n",
+    "plt.plot(X1D[:, 0][y==1], np.zeros(5), \"g^\")\n",
+    "plt.gca().get_yaxis().set_ticks([])\n",
+    "plt.xlabel(r\"$x_1$\", fontsize=20)\n",
+    "plt.axis([-4.5, 4.5, -0.2, 0.2])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.grid(True, which='both')\n",
+    "plt.axhline(y=0, color='k')\n",
+    "plt.axvline(x=0, color='k')\n",
+    "plt.plot(X2D[:, 0][y==0], X2D[:, 1][y==0], \"bs\")\n",
+    "plt.plot(X2D[:, 0][y==1], X2D[:, 1][y==1], \"g^\")\n",
+    "plt.xlabel(r\"$x_1$\", fontsize=20)\n",
+    "plt.ylabel(r\"$x_2$\", fontsize=20, rotation=0)\n",
+    "plt.gca().get_yaxis().set_ticks([0, 4, 8, 12, 16])\n",
+    "plt.plot([-4.5, 4.5], [6.5, 6.5], \"r--\", linewidth=3)\n",
+    "plt.axis([-4.5, 4.5, -1, 17])\n",
+    "\n",
+    "plt.subplots_adjust(right=1)\n",
+    "\n",
+    "save_fig(\"higher_dimensions_plot\", tight_layout=False)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_moons\n",
+    "X, y = make_moons(n_samples=100, noise=0.15, random_state=42)\n",
+    "\n",
+    "def plot_dataset(X, y, axes):\n",
+    "    plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
+    "    plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")\n",
+    "    plt.axis(axes)\n",
+    "    plt.grid(True, which='both')\n",
+    "    plt.xlabel(r\"$x_1$\", fontsize=20)\n",
+    "    plt.ylabel(r\"$x_2$\", fontsize=20, rotation=0)\n",
+    "\n",
+    "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import PolynomialFeatures\n",
+    "\n",
+    "polynomial_svm_clf = Pipeline((\n",
+    "        (\"poly_features\", PolynomialFeatures(degree=3)),\n",
+    "        (\"scaler\", StandardScaler()),\n",
+    "        (\"svm_clf\", LinearSVC(C=10, loss=\"hinge\"))\n",
+    "    ))\n",
+    "\n",
+    "polynomial_svm_clf.fit(X, y)\n",
+    "\n",
+    "def plot_predictions(clf, axes):\n",
+    "    x0s = np.linspace(axes[0], axes[1], 100)\n",
+    "    x1s = np.linspace(axes[2], axes[3], 100)\n",
+    "    x0, x1 = np.meshgrid(x0s, x1s)\n",
+    "    X = np.c_[x0.ravel(), x1.ravel()]\n",
+    "    y_pred = clf.predict(X).reshape(x0.shape)\n",
+    "    y_decision = clf.decision_function(X).reshape(x0.shape)\n",
+    "    plt.contourf(x0, x1, y_pred, cmap=plt.cm.brg, alpha=0.2)\n",
+    "    plt.contourf(x0, x1, y_decision, cmap=plt.cm.brg, alpha=0.1)\n",
+    "\n",
+    "plot_predictions(polynomial_svm_clf, [-1.5, 2.5, -1, 1.5])\n",
+    "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n",
+    "\n",
+    "save_fig(\"moons_polynomial_svc_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "poly_kernel_svm_clf = Pipeline((\n",
+    "        (\"scaler\", StandardScaler()),\n",
+    "        (\"svm_clf\", SVC(kernel=\"poly\", degree=3, coef0=1, C=5))\n",
+    "    ))\n",
+    "poly100_kernel_svm_clf = Pipeline((\n",
+    "        (\"scaler\", StandardScaler()),\n",
+    "        (\"svm_clf\", SVC(kernel=\"poly\", degree=10, coef0=100, C=5))\n",
+    "    ))\n",
+    "\n",
+    "poly_kernel_svm_clf.fit(X, y)\n",
+    "poly100_kernel_svm_clf.fit(X, y)\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plot_predictions(poly_kernel_svm_clf, [-1.5, 2.5, -1, 1.5])\n",
+    "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n",
+    "plt.title(r\"$d=3, r=1, C=5$\", fontsize=18)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plot_predictions(poly100_kernel_svm_clf, [-1.5, 2.5, -1, 1.5])\n",
+    "plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n",
+    "plt.title(r\"$d=10, r=100, C=5$\", fontsize=18)\n",
+    "\n",
+    "save_fig(\"moons_kernelized_polynomial_svc_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "def gaussian_rbf(x, landmark, gamma):\n",
+    "    return np.exp(-gamma * np.linalg.norm(x - landmark, axis=1)**2)\n",
+    "\n",
+    "gamma = 0.3\n",
+    "\n",
+    "x1s = np.linspace(-4.5, 4.5, 200).reshape(-1, 1)\n",
+    "x2s = gaussian_rbf(x1s, -2, gamma)\n",
+    "x3s = gaussian_rbf(x1s, 1, gamma)\n",
+    "\n",
+    "XK = np.c_[gaussian_rbf(X1D, -2, gamma), gaussian_rbf(X1D, 1, gamma)]\n",
+    "yk = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0])\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.grid(True, which='both')\n",
+    "plt.axhline(y=0, color='k')\n",
+    "plt.scatter(x=[-2, 1], y=[0, 0], s=150, alpha=0.5, c=\"red\")\n",
+    "plt.plot(X1D[:, 0][yk==0], np.zeros(4), \"bs\")\n",
+    "plt.plot(X1D[:, 0][yk==1], np.zeros(5), \"g^\")\n",
+    "plt.plot(x1s, x2s, \"g--\")\n",
+    "plt.plot(x1s, x3s, \"b:\")\n",
+    "plt.gca().get_yaxis().set_ticks([0, 0.25, 0.5, 0.75, 1])\n",
+    "plt.xlabel(r\"$x_1$\", fontsize=20)\n",
+    "plt.ylabel(r\"Similarity\", fontsize=14)\n",
+    "plt.annotate(r'$\\mathbf{x}$',\n",
+    "             xy=(X1D[3, 0], 0),\n",
+    "             xytext=(-0.5, 0.20),\n",
+    "             ha=\"center\",\n",
+    "             arrowprops=dict(facecolor='black', shrink=0.1),\n",
+    "             fontsize=18,\n",
+    "            )\n",
+    "plt.text(-2, 0.9, \"$x_2$\", ha=\"center\", fontsize=20)\n",
+    "plt.text(1, 0.9, \"$x_3$\", ha=\"center\", fontsize=20)\n",
+    "plt.axis([-4.5, 4.5, -0.1, 1.1])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.grid(True, which='both')\n",
+    "plt.axhline(y=0, color='k')\n",
+    "plt.axvline(x=0, color='k')\n",
+    "plt.plot(XK[:, 0][yk==0], XK[:, 1][yk==0], \"bs\")\n",
+    "plt.plot(XK[:, 0][yk==1], XK[:, 1][yk==1], \"g^\")\n",
+    "plt.xlabel(r\"$x_2$\", fontsize=20)\n",
+    "plt.ylabel(r\"$x_3$  \", fontsize=20, rotation=0)\n",
+    "plt.annotate(r'$\\phi\\left(\\mathbf{x}\\right)$',\n",
+    "             xy=(XK[3, 0], XK[3, 1]),\n",
+    "             xytext=(0.65, 0.50),\n",
+    "             ha=\"center\",\n",
+    "             arrowprops=dict(facecolor='black', shrink=0.1),\n",
+    "             fontsize=18,\n",
+    "            )\n",
+    "plt.plot([-0.1, 1.1], [0.57, -0.1], \"r--\", linewidth=3)\n",
+    "plt.axis([-0.1, 1.1, -0.1, 1.1])\n",
+    "    \n",
+    "plt.subplots_adjust(right=1)\n",
+    "\n",
+    "save_fig(\"kernel_method_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "x1_example = X1D[3, 0]\n",
+    "for landmark in (-2, 1):\n",
+    "    k = gaussian_rbf(np.array([[x1_example]]), np.array([[landmark]]), gamma)\n",
+    "    print(\"Phi({}, {}) = {}\".format(x1_example, landmark, k))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "rbf_kernel_svm_clf = Pipeline((\n",
+    "        (\"scaler\", StandardScaler()),\n",
+    "        (\"svm_clf\", SVC(kernel=\"rbf\", gamma=5, C=0.001))\n",
+    "    ))\n",
+    "rbf_kernel_svm_clf.fit(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "\n",
+    "gamma1, gamma2 = 0.1, 5\n",
+    "C1, C2 = 0.001, 1000\n",
+    "hyperparams = (gamma1, C1), (gamma1, C2), (gamma2, C1), (gamma2, C2)\n",
+    "\n",
+    "svm_clfs = []\n",
+    "for gamma, C in hyperparams:\n",
+    "    rbf_kernel_svm_clf = Pipeline((\n",
+    "            (\"scaler\", StandardScaler()),\n",
+    "            (\"svm_clf\", SVC(kernel=\"rbf\", gamma=gamma, C=C))\n",
+    "        ))\n",
+    "    rbf_kernel_svm_clf.fit(X, y)\n",
+    "    svm_clfs.append(rbf_kernel_svm_clf)\n",
+    "\n",
+    "plt.figure(figsize=(11, 7))\n",
+    "\n",
+    "for i, svm_clf in enumerate(svm_clfs):\n",
+    "    plt.subplot(221 + i)\n",
+    "    plot_predictions(svm_clf, [-1.5, 2.5, -1, 1.5])\n",
+    "    plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])\n",
+    "    gamma, C = hyperparams[i]\n",
+    "    plt.title(r\"$\\gamma = {}, C = {}$\".format(gamma, C), fontsize=16)\n",
+    "\n",
+    "save_fig(\"moons_rbf_svc_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Regression\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import LinearSVR\n",
+    "\n",
+    "rnd.seed(42)\n",
+    "m = 50\n",
+    "X = 2 * rnd.rand(m, 1)\n",
+    "y = (4 + 3 * X + rnd.randn(m, 1)).ravel()\n",
+    "\n",
+    "svm_reg1 = LinearSVR(epsilon=1.5)\n",
+    "svm_reg2 = LinearSVR(epsilon=0.5)\n",
+    "svm_reg1.fit(X, y)\n",
+    "svm_reg2.fit(X, y)\n",
+    "\n",
+    "def find_support_vectors(svm_reg, X, y):\n",
+    "    y_pred = svm_reg.predict(X)\n",
+    "    off_margin = (np.abs(y - y_pred) >= svm_reg.epsilon)\n",
+    "    return np.argwhere(off_margin)\n",
+    "\n",
+    "svm_reg1.support_ = find_support_vectors(svm_reg1, X, y)\n",
+    "svm_reg2.support_ = find_support_vectors(svm_reg2, X, y)\n",
+    "\n",
+    "eps_x1 = 1\n",
+    "eps_y_pred = svm_reg1.predict([[eps_x1]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def plot_svm_regression(svm_reg, X, y, axes):\n",
+    "    x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1)\n",
+    "    y_pred = svm_reg.predict(x1s)\n",
+    "    plt.plot(x1s, y_pred, \"k-\", linewidth=2, label=r\"$\\hat{y}$\")\n",
+    "    plt.plot(x1s, y_pred + svm_reg.epsilon, \"k--\")\n",
+    "    plt.plot(x1s, y_pred - svm_reg.epsilon, \"k--\")\n",
+    "    plt.scatter(X[svm_reg.support_], y[svm_reg.support_], s=180, facecolors='#FFAAAA')\n",
+    "    plt.plot(X, y, \"bo\")\n",
+    "    plt.xlabel(r\"$x_1$\", fontsize=18)\n",
+    "    plt.legend(loc=\"upper left\", fontsize=18)\n",
+    "    plt.axis(axes)\n",
+    "\n",
+    "plt.figure(figsize=(9, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_svm_regression(svm_reg1, X, y, [0, 2, 3, 11])\n",
+    "plt.title(r\"$\\epsilon = {}$\".format(svm_reg1.epsilon), fontsize=18)\n",
+    "plt.ylabel(r\"$y$\", fontsize=18, rotation=0)\n",
+    "#plt.plot([eps_x1, eps_x1], [eps_y_pred, eps_y_pred - svm_reg1.epsilon], \"k-\", linewidth=2)\n",
+    "plt.annotate(\n",
+    "        '', xy=(eps_x1, eps_y_pred), xycoords='data',\n",
+    "        xytext=(eps_x1, eps_y_pred - svm_reg1.epsilon),\n",
+    "        textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5}\n",
+    "    )\n",
+    "plt.text(0.91, 5.6, r\"$\\epsilon$\", fontsize=20)\n",
+    "plt.subplot(122)\n",
+    "plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11])\n",
+    "plt.title(r\"$\\epsilon = {}$\".format(svm_reg2.epsilon), fontsize=18)\n",
+    "save_fig(\"svm_regression_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVR\n",
+    "\n",
+    "rnd.seed(42)\n",
+    "m = 100\n",
+    "X = 2 * rnd.rand(m, 1) - 1\n",
+    "y = (0.2 + 0.1 * X + 0.5 * X**2 + rnd.randn(m, 1)/10).ravel()\n",
+    "\n",
+    "svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1)\n",
+    "svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1)\n",
+    "svm_poly_reg1.fit(X, y)\n",
+    "svm_poly_reg2.fit(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(9, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_svm_regression(svm_poly_reg1, X, y, [-1, 1, 0, 1])\n",
+    "plt.title(r\"$degree={}, C={}, \\epsilon = {}$\".format(svm_poly_reg1.degree, svm_poly_reg1.C, svm_poly_reg1.epsilon), fontsize=18)\n",
+    "plt.ylabel(r\"$y$\", fontsize=18, rotation=0)\n",
+    "plt.subplot(122)\n",
+    "plot_svm_regression(svm_poly_reg2, X, y, [-1, 1, 0, 1])\n",
+    "plt.title(r\"$degree={}, C={}, \\epsilon = {}$\".format(svm_poly_reg2.degree, svm_poly_reg2.C, svm_poly_reg2.epsilon), fontsize=18)\n",
+    "save_fig(\"svm_with_polynomial_kernel_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Under the hood"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "iris = datasets.load_iris()\n",
+    "X = iris[\"data\"][:, (2, 3)]  # petal length, petal width\n",
+    "y = (iris[\"target\"] == 2).astype(np.float64)  # Iris-Virginica"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from mpl_toolkits.mplot3d import Axes3D\n",
+    "\n",
+    "def plot_3D_decision_function(ax, w, b, x1_lim=[4, 6], x2_lim=[0.8, 2.8]):\n",
+    "    x1_in_bounds = (X[:, 0] > x1_lim[0]) & (X[:, 0] < x1_lim[1])\n",
+    "    X_crop = X[x1_in_bounds]\n",
+    "    y_crop = y[x1_in_bounds]\n",
+    "    x1s = np.linspace(x1_lim[0], x1_lim[1], 20)\n",
+    "    x2s = np.linspace(x2_lim[0], x2_lim[1], 20)\n",
+    "    x1, x2 = np.meshgrid(x1s, x2s)\n",
+    "    xs = np.c_[x1.ravel(), x2.ravel()]\n",
+    "    df = (xs.dot(w) + b).reshape(x1.shape)\n",
+    "    m = 1 / np.linalg.norm(w)\n",
+    "    boundary_x2s = -x1s*(w[0]/w[1])-b/w[1]\n",
+    "    margin_x2s_1 = -x1s*(w[0]/w[1])-(b-1)/w[1]\n",
+    "    margin_x2s_2 = -x1s*(w[0]/w[1])-(b+1)/w[1]\n",
+    "    ax.plot_surface(x1s, x2, 0, color=\"b\", alpha=0.2, cstride=100, rstride=100)\n",
+    "    ax.plot(x1s, boundary_x2s, 0, \"k-\", linewidth=2, label=r\"$h=0$\")\n",
+    "    ax.plot(x1s, margin_x2s_1, 0, \"k--\", linewidth=2, label=r\"$h=\\pm 1$\")\n",
+    "    ax.plot(x1s, margin_x2s_2, 0, \"k--\", linewidth=2)\n",
+    "    ax.plot(X_crop[:, 0][y_crop==1], X_crop[:, 1][y_crop==1], 0, \"g^\")\n",
+    "    ax.plot_wireframe(x1, x2, df, alpha=0.3, color=\"k\")\n",
+    "    ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, \"bs\")\n",
+    "    ax.axis(x1_lim + x2_lim)\n",
+    "    ax.text(4.5, 2.5, 3.8, \"Decision function $h$\", fontsize=15)\n",
+    "    ax.set_xlabel(r\"Petal length\", fontsize=15)\n",
+    "    ax.set_ylabel(r\"Petal width\", fontsize=15)\n",
+    "    ax.set_zlabel(r\"$h = \\mathbf{w}^t \\cdot \\mathbf{x} + b$\", fontsize=18)\n",
+    "    ax.legend(loc=\"upper left\", fontsize=16)\n",
+    "\n",
+    "fig = plt.figure(figsize=(11, 6))\n",
+    "ax1 = fig.add_subplot(111, projection='3d')\n",
+    "plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])\n",
+    "\n",
+    "save_fig(\"iris_3D_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Small weight vector results in a large margin"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def plot_2D_decision_function(w, b, ylabel=True, x1_lim=[-3, 3]):\n",
+    "    x1 = np.linspace(x1_lim[0], x1_lim[1], 200)\n",
+    "    y = w * x1 + b\n",
+    "    m = 1 / w\n",
+    "\n",
+    "    plt.plot(x1, y)\n",
+    "    plt.plot(x1_lim, [1, 1], \"k:\")\n",
+    "    plt.plot(x1_lim, [-1, -1], \"k:\")\n",
+    "    plt.axhline(y=0, color='k')\n",
+    "    plt.axvline(x=0, color='k')\n",
+    "    plt.plot([m, m], [0, 1], \"k--\")\n",
+    "    plt.plot([-m, -m], [0, -1], \"k--\")\n",
+    "    plt.plot([-m, m], [0, 0], \"k-o\", linewidth=3)\n",
+    "    plt.axis(x1_lim + [-2, 2])\n",
+    "    plt.xlabel(r\"$x_1$\", fontsize=16)\n",
+    "    if ylabel:\n",
+    "        plt.ylabel(r\"$w_1 x_1$  \", rotation=0, fontsize=16)\n",
+    "    plt.title(r\"$w_1 = {}$\".format(w), fontsize=16)\n",
+    "\n",
+    "plt.figure(figsize=(12, 3.2))\n",
+    "plt.subplot(121)\n",
+    "plot_2D_decision_function(1, 0)\n",
+    "plt.subplot(122)\n",
+    "plot_2D_decision_function(0.5, 0, ylabel=False)\n",
+    "save_fig(\"small_w_large_margin_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "iris = datasets.load_iris()\n",
+    "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
+    "y = (iris[\"target\"] == 2).astype(np.float64) # Iris-Virginica\n",
+    "\n",
+    "svm_clf = SVC(kernel=\"linear\", C=1)\n",
+    "svm_clf.fit(X, y)\n",
+    "svm_clf.predict([[5.3, 1.3]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Hinge loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "t = np.linspace(-2, 4, 200)\n",
+    "h = np.where(1 - t < 0, 0, 1 - t)  # max(0, 1-t)\n",
+    "\n",
+    "plt.figure(figsize=(5,2.8))\n",
+    "plt.plot(t, h, \"b-\", linewidth=2, label=\"$max(0, 1 - t)$\")\n",
+    "plt.grid(True, which='both')\n",
+    "plt.axhline(y=0, color='k')\n",
+    "plt.axvline(x=0, color='k')\n",
+    "plt.yticks(np.arange(-1, 2.5, 1))\n",
+    "plt.xlabel(\"$t$\", fontsize=16)\n",
+    "plt.axis([-2, 4, -1, 2.5])\n",
+    "plt.legend(loc=\"upper right\", fontsize=16)\n",
+    "save_fig(\"hinge_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Extra material"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X, y = make_moons(n_samples=1000, noise=0.4)\n",
+    "plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
+    "plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"g^\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "tol = 0.1\n",
+    "tols = []\n",
+    "times = []\n",
+    "for i in range(10):\n",
+    "    svm_clf = SVC(kernel=\"poly\", gamma=3, C=10, tol=tol, verbose=1)\n",
+    "    t1 = time.time()\n",
+    "    svm_clf.fit(X, y)\n",
+    "    t2 = time.time()\n",
+    "    times.append(t2-t1)\n",
+    "    tols.append(tol)\n",
+    "    print(i, tol, t2-t1)\n",
+    "    tol /= 10\n",
+    "plt.semilogx(tols, times)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Identical linear classifiers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC, LinearSVC\n",
+    "from sklearn.linear_model import SGDClassifier\n",
+    "from sklearn.datasets import make_moons\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "X, y = make_moons(n_samples=100, noise=0.15, random_state=42)\n",
+    "\n",
+    "C = 5\n",
+    "alpha = 1 / (C * len(X))\n",
+    "\n",
+    "sgd_clf = SGDClassifier(loss=\"hinge\", learning_rate=\"constant\", eta0=0.001, alpha=alpha, n_iter=100000, random_state=42)\n",
+    "svm_clf = SVC(kernel=\"linear\", C=C)\n",
+    "lin_clf = LinearSVC(loss=\"hinge\", C=C)\n",
+    "\n",
+    "X_scaled = StandardScaler().fit_transform(X)\n",
+    "sgd_clf.fit(X_scaled, y)\n",
+    "svm_clf.fit(X_scaled, y)\n",
+    "lin_clf.fit(X_scaled, y)\n",
+    "\n",
+    "print(\"SGDClassifier(alpha={}):     \".format(sgd_clf1.alpha), sgd_clf.intercept_, sgd_clf.coef_)\n",
+    "print(\"SVC:                         \", svm_clf.intercept_, svm_clf.coef_)\n",
+    "print(\"LinearSVC:                   \", lin_clf.intercept_, lin_clf.coef_)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Linear SVM classifier implementation using Batch Gradient Descent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Training set\n",
+    "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
+    "y = (iris[\"target\"] == 2).astype(np.float64).reshape(-1, 1) # Iris-Virginica"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.base import BaseEstimator\n",
+    "\n",
+    "class MyLinearSVC(BaseEstimator):\n",
+    "    def __init__(self, C=1, eta0=1, eta_d=10000, n_epochs=1000, random_state=None):\n",
+    "        self.C = C\n",
+    "        self.eta0 = eta0\n",
+    "        self.n_epochs = n_epochs\n",
+    "        self.random_state = random_state\n",
+    "        self.eta_d = eta_d\n",
+    "\n",
+    "    def eta(self, epoch):\n",
+    "        return self.eta0 / (epoch + self.eta_d)\n",
+    "        \n",
+    "    def fit(self, X, y):\n",
+    "        # Random initialization\n",
+    "        if self.random_state:\n",
+    "            rnd.seed(self.random_state)\n",
+    "        w = rnd.randn(X.shape[1], 1) # n feature weights\n",
+    "        b = 0\n",
+    "\n",
+    "        m = len(X)\n",
+    "        t = y * 2 - 1  # -1 if t==0, +1 if t==1\n",
+    "        X_t = X * t\n",
+    "        self.Js=[]\n",
+    "\n",
+    "        # Training\n",
+    "        for epoch in range(self.n_epochs):\n",
+    "            support_vectors_idx = (X_t.dot(w) + t * b < 1).ravel()\n",
+    "            X_t_sv = X_t[support_vectors_idx]\n",
+    "            t_sv = t[support_vectors_idx]\n",
+    "\n",
+    "            J = 1/2 * np.sum(w * w) + self.C * (np.sum(1 - X_t_sv.dot(w)) - b * np.sum(t_sv))\n",
+    "            self.Js.append(J)\n",
+    "\n",
+    "            w_gradient_vector = w - self.C * np.sum(X_t_sv, axis=0).reshape(-1, 1)\n",
+    "            b_derivative = -C * np.sum(t_sv)\n",
+    "                \n",
+    "            w = w - self.eta(epoch) * w_gradient_vector\n",
+    "            b = b - self.eta(epoch) * b_derivative\n",
+    "            \n",
+    "\n",
+    "        self.intercept_ = np.array([b])\n",
+    "        self.coef_ = np.array([w])\n",
+    "        support_vectors_idx = (X_t.dot(w) + b < 1).ravel()\n",
+    "        self.support_vectors_ = X[support_vectors_idx]\n",
+    "        return self\n",
+    "\n",
+    "    def decision_function(self, X):\n",
+    "        return X.dot(self.coef_[0]) + self.intercept_[0]\n",
+    "\n",
+    "    def predict(self, X):\n",
+    "        return (self.decision_function(X) >= 0).astype(np.float64)\n",
+    "\n",
+    "C=2\n",
+    "svm_clf = MyLinearSVC(C=C, eta0 = 10, eta_d = 1000, n_epochs=60000, random_state=2)\n",
+    "svm_clf.fit(X, y)\n",
+    "svm_clf.predict(np.array([[5, 2], [4, 1]]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.plot(range(svm_clf.n_epochs), svm_clf.Js)\n",
+    "plt.axis([0, svm_clf.n_epochs, 0, 100])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(svm_clf.intercept_, svm_clf.coef_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "svm_clf2 = SVC(kernel=\"linear\", C=C)\n",
+    "svm_clf2.fit(X, y.ravel())\n",
+    "print(svm_clf2.intercept_, svm_clf2.coef_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "yr = y.ravel()\n",
+    "plt.figure(figsize=(12,3.2))\n",
+    "plt.subplot(121)\n",
+    "plt.plot(X[:, 0][yr==1], X[:, 1][yr==1], \"g^\", label=\"Iris-Virginica\")\n",
+    "plt.plot(X[:, 0][yr==0], X[:, 1][yr==0], \"bs\", label=\"Not Iris-Virginica\")\n",
+    "plot_svc_decision_boundary(svm_clf, 4, 6)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "plt.title(\"MyLinearSVC\", fontsize=14)\n",
+    "plt.axis([4, 6, 0.8, 2.8])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(X[:, 0][yr==1], X[:, 1][yr==1], \"g^\")\n",
+    "plt.plot(X[:, 0][yr==0], X[:, 1][yr==0], \"bs\")\n",
+    "plot_svc_decision_boundary(svm_clf2, 4, 6)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.title(\"SVC\", fontsize=14)\n",
+    "plt.axis([4, 6, 0.8, 2.8])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import SGDClassifier\n",
+    "\n",
+    "sgd_clf = SGDClassifier(loss=\"hinge\", alpha = 0.017, n_iter = 50, random_state=42)\n",
+    "sgd_clf.fit(X, y.ravel())\n",
+    "\n",
+    "m = len(X)\n",
+    "t = y * 2 - 1  # -1 if t==0, +1 if t==1\n",
+    "X_b = np.c_[np.ones((m, 1)), X]  # Add bias input x0=1\n",
+    "X_b_t = X_b * t\n",
+    "sgd_theta = np.r_[sgd_clf.intercept_[0], sgd_clf.coef_[0]]\n",
+    "print(sgd_theta)\n",
+    "support_vectors_idx = (X_b_t.dot(sgd_theta) < 1).ravel()\n",
+    "sgd_clf.support_vectors_ = X[support_vectors_idx]\n",
+    "sgd_clf.C = C\n",
+    "\n",
+    "plt.figure(figsize=(5.5,3.2))\n",
+    "plt.plot(X[:, 0][yr==1], X[:, 1][yr==1], \"g^\")\n",
+    "plt.plot(X[:, 0][yr==0], X[:, 1][yr==0], \"bs\")\n",
+    "plot_svc_decision_boundary(sgd_clf, 4, 6)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "plt.title(\"SGDClassifier\", fontsize=14)\n",
+    "plt.axis([4, 6, 0.8, 2.8])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {},
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/06_decision_trees.ipynb b/06_decision_trees.ipynb
new file mode 100644
index 0000000..8e417ab
--- /dev/null
+++ b/06_decision_trees.ipynb
@@ -0,0 +1,506 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 6 – Decision Trees**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 6._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"decision_trees\"\n",
+    "\n",
+    "def image_path(fig_id):\n",
+    "    return os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id)\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(image_path(fig_id) + \".png\", format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training and visualizing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_iris\n",
+    "from sklearn.tree import DecisionTreeClassifier, export_graphviz\n",
+    "\n",
+    "iris = load_iris()\n",
+    "X = iris.data[:, 2:] # petal length and width\n",
+    "y = iris.target\n",
+    "\n",
+    "tree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)\n",
+    "tree_clf.fit(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "export_graphviz(\n",
+    "        tree_clf,\n",
+    "        out_file=image_path(\"iris_tree.dot\"),\n",
+    "        feature_names=iris.feature_names[2:],\n",
+    "        class_names=iris.target_names,\n",
+    "        rounded=True,\n",
+    "        filled=True\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib.colors import ListedColormap\n",
+    "\n",
+    "def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], iris=True, legend=False, plot_training=True):\n",
+    "    x1s = np.linspace(axes[0], axes[1], 100)\n",
+    "    x2s = np.linspace(axes[2], axes[3], 100)\n",
+    "    x1, x2 = np.meshgrid(x1s, x2s)\n",
+    "    X_new = np.c_[x1.ravel(), x2.ravel()]\n",
+    "    y_pred = clf.predict(X_new).reshape(x1.shape)\n",
+    "    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n",
+    "    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap, linewidth=10)\n",
+    "    if not iris:\n",
+    "        custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])\n",
+    "        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)\n",
+    "    if plot_training:\n",
+    "        plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\", label=\"Iris-Setosa\")\n",
+    "        plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\", label=\"Iris-Versicolour\")\n",
+    "        plt.plot(X[:, 0][y==2], X[:, 1][y==2], \"g^\", label=\"Iris-Virginica\")\n",
+    "        plt.axis(axes)\n",
+    "    if iris:\n",
+    "        plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "        plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "    else:\n",
+    "        plt.xlabel(r\"$x_1$\", fontsize=18)\n",
+    "        plt.ylabel(r\"$x_2$\", fontsize=18, rotation=0)\n",
+    "    if legend:\n",
+    "        plt.legend(loc=\"lower right\", fontsize=14)\n",
+    "\n",
+    "plt.figure(figsize=(8, 4))\n",
+    "plot_decision_boundary(tree_clf, X, y)\n",
+    "plt.plot([2.45, 2.45], [0, 3], \"k-\", linewidth=2)\n",
+    "plt.plot([2.45, 7.5], [1.75, 1.75], \"k--\", linewidth=2)\n",
+    "plt.plot([4.95, 4.95], [0, 1.75], \"k:\", linewidth=2)\n",
+    "plt.plot([4.85, 4.85], [1.75, 3], \"k:\", linewidth=2)\n",
+    "plt.text(1.40, 1.0, \"Depth=0\", fontsize=15)\n",
+    "plt.text(3.2, 1.80, \"Depth=1\", fontsize=13)\n",
+    "plt.text(4.05, 0.5, \"(Depth=2)\", fontsize=11)\n",
+    "\n",
+    "save_fig(\"decision_tree_decision_boundaries_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Predicting classes and class probabilities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tree_clf.predict_proba([[5, 1.5]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tree_clf.predict([[5, 1.5]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Sensitivity to training set details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X[(X[:, 1]==X[:, 1][y==1].max()) & (y==1)] # widest Iris-Versicolour flower"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "not_widest_versicolour = (X[:, 1]!=1.8) | (y==2)\n",
+    "X_tweaked = X[not_widest_versicolour]\n",
+    "y_tweaked = y[not_widest_versicolour]\n",
+    "\n",
+    "tree_clf_tweaked = DecisionTreeClassifier(max_depth=2, random_state=40)\n",
+    "tree_clf_tweaked.fit(X_tweaked, y_tweaked)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(8, 4))\n",
+    "plot_decision_boundary(tree_clf_tweaked, X_tweaked, y_tweaked, legend=False)\n",
+    "plt.plot([0, 7.5], [0.8, 0.8], \"k-\", linewidth=2)\n",
+    "plt.plot([0, 7.5], [1.75, 1.75], \"k--\", linewidth=2)\n",
+    "plt.text(1.0, 0.9, \"Depth=0\", fontsize=15)\n",
+    "plt.text(1.0, 1.80, \"Depth=1\", fontsize=13)\n",
+    "\n",
+    "save_fig(\"decision_tree_instability_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_moons\n",
+    "Xm, ym = make_moons(n_samples=100, noise=0.25, random_state=53)\n",
+    "\n",
+    "deep_tree_clf1 = DecisionTreeClassifier(random_state=42)\n",
+    "deep_tree_clf2 = DecisionTreeClassifier(min_samples_leaf=4, random_state=42)\n",
+    "deep_tree_clf1.fit(Xm, ym)\n",
+    "deep_tree_clf2.fit(Xm, ym)\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_decision_boundary(deep_tree_clf1, Xm, ym, axes=[-1.5, 2.5, -1, 1.5], iris=False)\n",
+    "plt.title(\"No restrictions\", fontsize=16)\n",
+    "plt.subplot(122)\n",
+    "plot_decision_boundary(deep_tree_clf2, Xm, ym, axes=[-1.5, 2.5, -1, 1.5], iris=False)\n",
+    "plt.title(\"min_samples_leaf = {}\".format(deep_tree_clf2.min_samples_leaf), fontsize=14)\n",
+    "\n",
+    "save_fig(\"min_samples_leaf_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "angle = np.pi / 180 * 20\n",
+    "rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])\n",
+    "Xr = X.dot(rotation_matrix)\n",
+    "\n",
+    "tree_clf_r = DecisionTreeClassifier(random_state=42)\n",
+    "tree_clf_r.fit(Xr, y)\n",
+    "\n",
+    "plt.figure(figsize=(8, 3))\n",
+    "plot_decision_boundary(tree_clf_r, Xr, y, axes=[0.5, 7.5, -1.0, 1], iris=False)\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "rnd.seed(6)\n",
+    "Xs = rnd.rand(100, 2) - 0.5\n",
+    "ys = (Xs[:, 0] > 0).astype(np.float32) * 2\n",
+    "\n",
+    "angle = np.pi / 4\n",
+    "rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])\n",
+    "Xsr = Xs.dot(rotation_matrix)\n",
+    "\n",
+    "tree_clf_s = DecisionTreeClassifier(random_state=42)\n",
+    "tree_clf_s.fit(Xs, ys)\n",
+    "tree_clf_sr = DecisionTreeClassifier(random_state=42)\n",
+    "tree_clf_sr.fit(Xsr, ys)\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_decision_boundary(tree_clf_s, Xs, ys, axes=[-0.7, 0.7, -0.7, 0.7], iris=False)\n",
+    "plt.subplot(122)\n",
+    "plot_decision_boundary(tree_clf_sr, Xsr, ys, axes=[-0.7, 0.7, -0.7, 0.7], iris=False)\n",
+    "\n",
+    "save_fig(\"sensitivity_to_rotation_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Regression trees"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "\n",
+    "# Quadratic training set + noise\n",
+    "rnd.seed(42)\n",
+    "m = 200\n",
+    "X = rnd.rand(m, 1)\n",
+    "y = 4 * (X - 0.5) ** 2\n",
+    "y = y + rnd.randn(m, 1) / 10\n",
+    "\n",
+    "tree_reg1 = DecisionTreeRegressor(random_state=42, max_depth=2)\n",
+    "tree_reg2 = DecisionTreeRegressor(random_state=42, max_depth=3)\n",
+    "tree_reg1.fit(X, y)\n",
+    "tree_reg2.fit(X, y)\n",
+    "\n",
+    "def plot_regression_predictions(tree_reg, X, y, axes=[0, 1, -0.2, 1], ylabel=\"$y$\"):\n",
+    "    x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)\n",
+    "    y_pred = tree_reg.predict(x1)\n",
+    "    plt.axis(axes)\n",
+    "    plt.xlabel(\"$x_1$\", fontsize=18)\n",
+    "    if ylabel:\n",
+    "        plt.ylabel(ylabel, fontsize=18, rotation=0)\n",
+    "    plt.plot(X, y, \"b.\")\n",
+    "    plt.plot(x1, y_pred, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_regression_predictions(tree_reg1, X, y)\n",
+    "for split, style in ((0.1973, \"k-\"), (0.0917, \"k--\"), (0.7718, \"k--\")):\n",
+    "    plt.plot([split, split], [-0.2, 1], style, linewidth=2)\n",
+    "plt.text(0.21, 0.65, \"Depth=0\", fontsize=15)\n",
+    "plt.text(0.01, 0.2, \"Depth=1\", fontsize=13)\n",
+    "plt.text(0.65, 0.8, \"Depth=1\", fontsize=13)\n",
+    "plt.legend(loc=\"upper center\", fontsize=18)\n",
+    "plt.title(\"max_depth=2\", fontsize=14)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plot_regression_predictions(tree_reg2, X, y, ylabel=None)\n",
+    "for split, style in ((0.1973, \"k-\"), (0.0917, \"k--\"), (0.7718, \"k--\")):\n",
+    "    plt.plot([split, split], [-0.2, 1], style, linewidth=2)\n",
+    "for split in (0.0458, 0.1298, 0.2873, 0.9040):\n",
+    "    plt.plot([split, split], [-0.2, 1], \"k:\", linewidth=1)\n",
+    "plt.text(0.3, 0.5, \"Depth=2\", fontsize=13)\n",
+    "plt.title(\"max_depth=3\", fontsize=14)\n",
+    "\n",
+    "save_fig(\"tree_regression_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export_graphviz(\n",
+    "        tree_reg1,\n",
+    "        out_file=image_path(\"regression_tree.dot\"),\n",
+    "        feature_names=[\"x1\"],\n",
+    "        rounded=True,\n",
+    "        filled=True\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tree_reg1 = DecisionTreeRegressor(random_state=42)\n",
+    "tree_reg2 = DecisionTreeRegressor(random_state=42, min_samples_leaf=10)\n",
+    "tree_reg1.fit(X, y)\n",
+    "tree_reg2.fit(X, y)\n",
+    "\n",
+    "x1 = np.linspace(0, 1, 500).reshape(-1, 1)\n",
+    "y_pred1 = tree_reg1.predict(x1)\n",
+    "y_pred2 = tree_reg2.predict(x1)\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.plot(X, y, \"b.\")\n",
+    "plt.plot(x1, y_pred1, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n",
+    "plt.axis([0, 1, -0.2, 1.1])\n",
+    "plt.xlabel(\"$x_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$y$\", fontsize=18, rotation=0)\n",
+    "plt.legend(loc=\"upper center\", fontsize=18)\n",
+    "plt.title(\"No restrictions\", fontsize=14)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(X, y, \"b.\")\n",
+    "plt.plot(x1, y_pred2, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n",
+    "plt.axis([0, 1, -0.2, 1.1])\n",
+    "plt.xlabel(\"$x_1$\", fontsize=18)\n",
+    "plt.title(\"min_samples_leaf={}\".format(tree_reg2.min_samples_leaf), fontsize=14)\n",
+    "\n",
+    "save_fig(\"tree_regression_regularization_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "309px",
+   "width": "468px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb
new file mode 100644
index 0000000..0c716b8
--- /dev/null
+++ b/07_ensemble_learning_and_random_forests.ipynb
@@ -0,0 +1,788 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 7 – Ensemble Learning and Random Forests**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 7._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"ensembles\"\n",
+    "\n",
+    "def image_path(fig_id):\n",
+    "    return os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id)\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(image_path(fig_id) + \".png\", format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Voting classifiers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "heads_proba = 0.51\n",
+    "coin_tosses = (rnd.rand(10000, 10) < heads_proba).astype(np.int32)\n",
+    "cumulative_heads_ratio = np.cumsum(coin_tosses, axis=0) / np.arange(1, 10001).reshape(-1, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(8,3.5))\n",
+    "plt.plot(cumulative_heads_ratio)\n",
+    "plt.plot([0, 10000], [0.51, 0.51], \"k--\", linewidth=2, label=\"51%\")\n",
+    "plt.plot([0, 10000], [0.5, 0.5], \"k-\", label=\"50%\")\n",
+    "plt.xlabel(\"Number of coin tosses\")\n",
+    "plt.ylabel(\"Heads ratio\")\n",
+    "plt.legend(loc=\"lower right\")\n",
+    "plt.axis([0, 10000, 0.42, 0.58])\n",
+    "save_fig(\"law_of_large_numbers_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.cross_validation import train_test_split\n",
+    "from sklearn.datasets import make_moons\n",
+    "\n",
+    "X, y = make_moons(n_samples=500, noise=0.30, random_state=42)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n",
+    "\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.ensemble import VotingClassifier\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.svm import SVC\n",
+    "\n",
+    "log_clf = LogisticRegression(random_state=42)\n",
+    "rnd_clf = RandomForestClassifier(random_state=42)\n",
+    "svm_clf = SVC(probability=True, random_state=42)\n",
+    "\n",
+    "voting_clf = VotingClassifier(\n",
+    "        estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
+    "        voting='soft'\n",
+    "    )\n",
+    "voting_clf.fit(X_train, y_train)\n",
+    "\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "\n",
+    "for clf in (log_clf, rnd_clf, svm_clf, voting_clf):\n",
+    "    clf.fit(X_train, y_train)\n",
+    "    y_pred = clf.predict(X_test)\n",
+    "    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Bagging ensembles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_moons\n",
+    "from sklearn.ensemble import BaggingClassifier\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "\n",
+    "bag_clf = BaggingClassifier(\n",
+    "        DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
+    "        max_samples=100, bootstrap=True, n_jobs=-1, random_state=42\n",
+    "    )\n",
+    "bag_clf.fit(X_train, y_train)\n",
+    "y_pred = bag_clf.predict(X_test)\n",
+    "print(accuracy_score(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tree_clf = DecisionTreeClassifier(random_state=42)\n",
+    "tree_clf.fit(X_train, y_train)\n",
+    "y_pred_tree = tree_clf.predict(X_test)\n",
+    "print(accuracy_score(y_test, y_pred_tree))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib.colors import ListedColormap\n",
+    "\n",
+    "def plot_decision_boundary(clf, X, y, axes=[-1.5, 2.5, -1, 1.5], alpha=0.5, contour=True):\n",
+    "    x1s = np.linspace(axes[0], axes[1], 100)\n",
+    "    x2s = np.linspace(axes[2], axes[3], 100)\n",
+    "    x1, x2 = np.meshgrid(x1s, x2s)\n",
+    "    X_new = np.c_[x1.ravel(), x2.ravel()]\n",
+    "    y_pred = clf.predict(X_new).reshape(x1.shape)\n",
+    "    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n",
+    "    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap, linewidth=10)\n",
+    "    if contour:\n",
+    "        custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])\n",
+    "        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)\n",
+    "    plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"yo\", alpha=alpha)\n",
+    "    plt.plot(X[:, 0][y==1], X[:, 1][y==1], \"bs\", alpha=alpha)\n",
+    "    plt.axis(axes)\n",
+    "    plt.xlabel(r\"$x_1$\", fontsize=18)\n",
+    "    plt.ylabel(r\"$x_2$\", fontsize=18, rotation=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(11,4))\n",
+    "plt.subplot(121)\n",
+    "plot_decision_boundary(tree_clf, X, y)\n",
+    "plt.title(\"Decision Tree\", fontsize=14)\n",
+    "plt.subplot(122)\n",
+    "plot_decision_boundary(bag_clf, X, y)\n",
+    "plt.title(\"Decision Trees with Bagging\", fontsize=14)\n",
+    "save_fig(\"decision_tree_without_and_with_bagging_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Random Forests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "bag_clf = BaggingClassifier(\n",
+    "        DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
+    "        n_estimators=500, max_samples=1.0, bootstrap=True,\n",
+    "        n_jobs=-1, random_state=42\n",
+    "    )\n",
+    "bag_clf.fit(X_train, y_train)\n",
+    "y_pred = bag_clf.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "\n",
+    "rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)\n",
+    "rnd_clf.fit(X_train, y_train)\n",
+    "\n",
+    "y_pred_rf = rnd_clf.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.sum(y_pred == y_pred_rf) / len(y_pred)  # almost identical predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_iris\n",
+    "iris = load_iris()\n",
+    "rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1, random_state=42)\n",
+    "rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n",
+    "for name, importance in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
+    "    print(name, \"=\", importance)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "rnd_clf.feature_importances_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(6, 4))\n",
+    "\n",
+    "for i in range(15):\n",
+    "    tree_clf = DecisionTreeClassifier(max_leaf_nodes=16, random_state=42+i)\n",
+    "    indices_with_replacement = rnd.randint(0, len(X_train), len(X_train))\n",
+    "    tree_clf.fit(X[indices_with_replacement], y[indices_with_replacement])\n",
+    "    plot_decision_boundary(tree_clf, X, y, axes=[-1.5, 2.5, -1, 1.5], alpha=0.02, contour=False)\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Out-of-Bag evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "bag_clf = BaggingClassifier(\n",
+    "    DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
+    "    bootstrap=True, n_jobs=-1, oob_score=True, random_state=40\n",
+    ")\n",
+    "bag_clf.fit(X_train, y_train)\n",
+    "bag_clf.oob_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "bag_clf.oob_decision_function_[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import accuracy_score\n",
+    "y_pred = bag_clf.predict(X_test)\n",
+    "accuracy_score(y_test, y_pred)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Feature importance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import fetch_mldata\n",
+    "mnist = fetch_mldata('MNIST original')\n",
+    "rnd_clf = RandomForestClassifier(random_state=42)\n",
+    "rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def plot_digit(data):\n",
+    "    image = data.reshape(28, 28)\n",
+    "    plt.imshow(image, cmap = matplotlib.cm.hot,\n",
+    "               interpolation=\"nearest\")\n",
+    "    plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot_digit(rnd_clf.feature_importances_)\n",
+    "\n",
+    "cbar = plt.colorbar(ticks=[rnd_clf.feature_importances_.min(), rnd_clf.feature_importances_.max()])\n",
+    "cbar.ax.set_yticklabels(['Not important', 'Very important'])\n",
+    "\n",
+    "save_fig(\"mnist_feature_importance_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# AdaBoost"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import AdaBoostClassifier\n",
+    "\n",
+    "ada_clf = AdaBoostClassifier(\n",
+    "        DecisionTreeClassifier(max_depth=2), n_estimators=200,\n",
+    "        algorithm=\"SAMME.R\", learning_rate=0.5, random_state=42\n",
+    "    )\n",
+    "ada_clf.fit(X_train, y_train)\n",
+    "plot_decision_boundary(ada_clf, X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "m = len(X_train)\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "for subplot, learning_rate in ((121, 1), (122, 0.5)):\n",
+    "    sample_weights = np.ones(m)\n",
+    "    for i in range(5):\n",
+    "        plt.subplot(subplot)\n",
+    "        svm_clf = SVC(kernel=\"rbf\", C=0.05)\n",
+    "        svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n",
+    "        y_pred = svm_clf.predict(X_train)\n",
+    "        sample_weights[y_pred != y_train] *= (1 + learning_rate)\n",
+    "        plot_decision_boundary(svm_clf, X, y, alpha=0.2)\n",
+    "        plt.title(\"learning_rate = {}\".format(learning_rate - 1), fontsize=16)\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.text(-0.7, -0.65, \"1\", fontsize=14)\n",
+    "plt.text(-0.6, -0.10, \"2\", fontsize=14)\n",
+    "plt.text(-0.5,  0.10, \"3\", fontsize=14)\n",
+    "plt.text(-0.4,  0.55, \"4\", fontsize=14)\n",
+    "plt.text(-0.3,  0.90, \"5\", fontsize=14)\n",
+    "save_fig(\"boosting_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "list(m for m in dir(ada_clf) if not m.startswith(\"_\") and m.endswith(\"_\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Gradient Boosting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "\n",
+    "rnd.seed(42)\n",
+    "X = rnd.rand(100, 1) - 0.5\n",
+    "y = 3*X[:, 0]**2 + 0.05 * rnd.randn(100)\n",
+    "\n",
+    "tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
+    "tree_reg1.fit(X, y)\n",
+    "\n",
+    "y2 = y - tree_reg1.predict(X)\n",
+    "tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
+    "tree_reg2.fit(X, y2)\n",
+    "\n",
+    "y3 = y2 - tree_reg2.predict(X)\n",
+    "tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
+    "tree_reg3.fit(X, y3)\n",
+    "\n",
+    "X_new = np.array([[0.8]])\n",
+    "y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))\n",
+    "print(y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def plot_predictions(regressors, X, y, axes, label=None, style=\"r-\", data_style=\"b.\", data_label=None):\n",
+    "    x1 = np.linspace(axes[0], axes[1], 500)\n",
+    "    y_pred = sum(regressor.predict(x1.reshape(-1, 1)) for regressor in regressors)\n",
+    "    plt.plot(X[:, 0], y, data_style, label=data_label)\n",
+    "    plt.plot(x1, y_pred, style, linewidth=2, label=label)\n",
+    "    if label or data_label:\n",
+    "        plt.legend(loc=\"upper center\", fontsize=16)\n",
+    "    plt.axis(axes)\n",
+    "\n",
+    "plt.figure(figsize=(11,11))\n",
+    "\n",
+    "plt.subplot(321)\n",
+    "plot_predictions([tree_reg1], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h_1(x_1)$\", style=\"g-\", data_label=\"Training set\")\n",
+    "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n",
+    "plt.title(\"Residuals and tree predictions\", fontsize=16)\n",
+    "\n",
+    "plt.subplot(322)\n",
+    "plot_predictions([tree_reg1], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h(x_1) = h_1(x_1)$\", data_label=\"Training set\")\n",
+    "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n",
+    "plt.title(\"Ensemble predictions\", fontsize=16)\n",
+    "\n",
+    "plt.subplot(323)\n",
+    "plot_predictions([tree_reg2], X, y2, axes=[-0.5, 0.5, -0.5, 0.5], label=\"$h_2(x_1)$\", style=\"g-\", data_style=\"k+\", data_label=\"Residuals\")\n",
+    "plt.ylabel(\"$y - h_1(x_1)$\", fontsize=16)\n",
+    "\n",
+    "plt.subplot(324)\n",
+    "plot_predictions([tree_reg1, tree_reg2], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h(x_1) = h_1(x_1) + h_2(x_1)$\")\n",
+    "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n",
+    "\n",
+    "plt.subplot(325)\n",
+    "plot_predictions([tree_reg3], X, y3, axes=[-0.5, 0.5, -0.5, 0.5], label=\"$h_3(x_1)$\", style=\"g-\", data_style=\"k+\")\n",
+    "plt.ylabel(\"$y - h_1(x_1) - h_2(x_1)$\", fontsize=16)\n",
+    "plt.xlabel(\"$x_1$\", fontsize=16)\n",
+    "\n",
+    "plt.subplot(326)\n",
+    "plot_predictions([tree_reg1, tree_reg2, tree_reg3], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"$h(x_1) = h_1(x_1) + h_2(x_1) + h_3(x_1)$\")\n",
+    "plt.xlabel(\"$x_1$\", fontsize=16)\n",
+    "plt.ylabel(\"$y$\", fontsize=16, rotation=0)\n",
+    "\n",
+    "save_fig(\"gradient_boosting_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import GradientBoostingRegressor\n",
+    "\n",
+    "gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=0.1, random_state=42)\n",
+    "gbrt.fit(X, y)\n",
+    "\n",
+    "gbrt_slow = GradientBoostingRegressor(max_depth=2, n_estimators=200, learning_rate=0.1, random_state=42)\n",
+    "gbrt_slow.fit(X, y)\n",
+    "\n",
+    "plt.figure(figsize=(11,4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plot_predictions([gbrt], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label=\"Ensemble predictions\")\n",
+    "plt.title(\"learning_rate={}, n_estimators={}\".format(gbrt.learning_rate, gbrt.n_estimators), fontsize=14)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plot_predictions([gbrt_slow], X, y, axes=[-0.5, 0.5, -0.1, 0.8])\n",
+    "plt.title(\"learning_rate={}, n_estimators={}\".format(gbrt_slow.learning_rate, gbrt_slow.n_estimators), fontsize=14)\n",
+    "\n",
+    "save_fig(\"gbrt_learning_rate_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Gradient Boosting with Early stopping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.cross_validation import train_test_split\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "\n",
+    "X_train, X_val, y_train, y_val = train_test_split(X, y)\n",
+    "\n",
+    "gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=0.1, random_state=42)\n",
+    "gbrt.fit(X_train, y_train)\n",
+    "\n",
+    "errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "best_n_estimators = np.argmin(errors)\n",
+    "min_error = errors[best_n_estimators]\n",
+    "\n",
+    "gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=best_n_estimators, learning_rate=0.1, random_state=42)\n",
+    "gbrt_best.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(11, 4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.plot(errors, \"b.-\")\n",
+    "plt.plot([best_n_estimators, best_n_estimators], [0, min_error], \"k--\")\n",
+    "plt.plot([0, 120], [min_error, min_error], \"k--\")\n",
+    "plt.plot(best_n_estimators, min_error, \"ko\")\n",
+    "plt.text(best_n_estimators, min_error*1.2, \"Minimum\", ha=\"center\", fontsize=14)\n",
+    "plt.axis([0, 120, 0, 0.01])\n",
+    "plt.xlabel(\"Number of trees\")\n",
+    "plt.title(\"Validation error\", fontsize=14)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plot_predictions([gbrt_best], X, y, axes=[-0.5, 0.5, -0.1, 0.8])\n",
+    "plt.title(\"Best model (55 trees)\", fontsize=14)\n",
+    "\n",
+    "save_fig(\"early_stopping_gbrt_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=1, learning_rate=0.1, random_state=42, warm_start=True)\n",
+    "\n",
+    "min_val_error = float(\"inf\")\n",
+    "error_going_up = 0\n",
+    "for n_estimators in range(1, 120):\n",
+    "    gbrt.n_estimators = n_estimators\n",
+    "    gbrt.fit(X_train, y_train)\n",
+    "    y_pred = gbrt.predict(X_val)\n",
+    "    val_error = mean_squared_error(y_val, y_pred)\n",
+    "    if val_error < min_val_error:\n",
+    "        min_val_error = val_error\n",
+    "        error_going_up = 0\n",
+    "    else:\n",
+    "        error_going_up += 1\n",
+    "        if error_going_up == 5:\n",
+    "            break  # early stopping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(gbrt.n_estimators)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "252px",
+   "width": "333px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb
new file mode 100644
index 0000000..fa135f9
--- /dev/null
+++ b/08_dimensionality_reduction.ipynb
@@ -0,0 +1,1343 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 8 – Dimensionality Reduction**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 8._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"dim_reduction\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Projection methods\n",
+    "Build 3D dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "rnd.seed(4)\n",
+    "m = 60\n",
+    "w1, w2 = 0.1, 0.3\n",
+    "noise = 0.1\n",
+    "\n",
+    "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n",
+    "X = np.empty((m, 3))\n",
+    "X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n",
+    "X[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n",
+    "X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * rnd.randn(m)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Mean normalize the data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X = X - X.mean(axis=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Apply PCA to reduce to 2D."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "pca = PCA(n_components = 2)\n",
+    "X2D = pca.fit_transform(X)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Recover the 3D points projected on the plane (PCA 2D subspace)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "X2D_inv = pca.inverse_transform(X2D)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Utility class to draw 3D arrows (copied from http://stackoverflow.com/questions/11140163)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib.patches import FancyArrowPatch\n",
+    "from mpl_toolkits.mplot3d import proj3d\n",
+    "\n",
+    "class Arrow3D(FancyArrowPatch):\n",
+    "    def __init__(self, xs, ys, zs, *args, **kwargs):\n",
+    "        FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)\n",
+    "        self._verts3d = xs, ys, zs\n",
+    "\n",
+    "    def draw(self, renderer):\n",
+    "        xs3d, ys3d, zs3d = self._verts3d\n",
+    "        xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)\n",
+    "        self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))\n",
+    "        FancyArrowPatch.draw(self, renderer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Express the plane as a function of x and y."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "axes = [-1.8, 1.8, -1.3, 1.3, -1.0, 1.0]\n",
+    "\n",
+    "x1s = np.linspace(axes[0], axes[1], 10)\n",
+    "x2s = np.linspace(axes[2], axes[3], 10)\n",
+    "x1, x2 = np.meshgrid(x1s, x2s)\n",
+    "\n",
+    "C = pca.components_\n",
+    "R = C.T.dot(C)\n",
+    "z = (R[0, 2] * x1 + R[1, 2] * x2) / (1 - R[2, 2])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot the 3D dataset, the plane and the projections on that plane."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from mpl_toolkits.mplot3d import Axes3D\n",
+    "\n",
+    "fig = plt.figure(figsize=(6, 3.8))\n",
+    "ax = fig.add_subplot(111, projection='3d')\n",
+    "\n",
+    "X3D_above = X[X[:, 2] > X2D_inv[:, 2]]\n",
+    "X3D_below = X[X[:, 2] <= X2D_inv[:, 2]]\n",
+    "\n",
+    "ax.plot(X3D_below[:, 0], X3D_below[:, 1], X3D_below[:, 2], \"bo\", alpha=0.5)\n",
+    "\n",
+    "ax.plot_surface(x1, x2, z, alpha=0.2, color=\"k\")\n",
+    "np.linalg.norm(C, axis=0)\n",
+    "ax.add_artist(Arrow3D([0, C[0, 0]],[0, C[0, 1]],[0, C[0, 2]], mutation_scale=15, lw=1, arrowstyle=\"-|>\", color=\"k\"))\n",
+    "ax.add_artist(Arrow3D([0, C[1, 0]],[0, C[1, 1]],[0, C[1, 2]], mutation_scale=15, lw=1, arrowstyle=\"-|>\", color=\"k\"))\n",
+    "ax.plot([0], [0], [0], \"k.\")\n",
+    "\n",
+    "for i in range(m):\n",
+    "    if X[i, 2] > X2D_inv[i, 2]:\n",
+    "        ax.plot([X[i][0], X2D_inv[i][0]], [X[i][1], X2D_inv[i][1]], [X[i][2], X2D_inv[i][2]], \"k-\")\n",
+    "    else:\n",
+    "        ax.plot([X[i][0], X2D_inv[i][0]], [X[i][1], X2D_inv[i][1]], [X[i][2], X2D_inv[i][2]], \"k-\", color=\"#505050\")\n",
+    "    \n",
+    "ax.plot(X2D_inv[:, 0], X2D_inv[:, 1], X2D_inv[:, 2], \"k+\")\n",
+    "ax.plot(X2D_inv[:, 0], X2D_inv[:, 1], X2D_inv[:, 2], \"k.\")\n",
+    "ax.plot(X3D_above[:, 0], X3D_above[:, 1], X3D_above[:, 2], \"bo\")\n",
+    "ax.set_xlabel(\"$x_1$\", fontsize=18)\n",
+    "ax.set_ylabel(\"$x_2$\", fontsize=18)\n",
+    "ax.set_zlabel(\"$x_3$\", fontsize=18)\n",
+    "ax.set_xlim(axes[0:2])\n",
+    "ax.set_ylim(axes[2:4])\n",
+    "ax.set_zlim(axes[4:6])\n",
+    "\n",
+    "save_fig(\"dataset_3d_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fig = plt.figure()\n",
+    "ax = fig.add_subplot(111, aspect='equal')\n",
+    "\n",
+    "ax.plot(X2D[:, 0], X2D[:, 1], \"k+\")\n",
+    "ax.plot(X2D[:, 0], X2D[:, 1], \"k.\")\n",
+    "ax.plot([0], [0], \"ko\")\n",
+    "ax.arrow(0, 0, 0, 1, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n",
+    "ax.arrow(0, 0, 1, 0, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n",
+    "ax.set_xlabel(\"$z_1$\", fontsize=18)\n",
+    "ax.set_ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "ax.axis([-1.5, 1.3, -1.2, 1.2])\n",
+    "ax.grid(True)\n",
+    "save_fig(\"dataset_2d_plot\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "PCA using SVD decomposition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "m, n = X.shape\n",
+    "\n",
+    "X_centered = X - X.mean(axis=0)\n",
+    "U, s, V = np.linalg.svd(X_centered)\n",
+    "c1 = V.T[:, 0]\n",
+    "c2 = V.T[:, 1]\n",
+    "\n",
+    "S = np.zeros(X.shape)\n",
+    "S[:n, :n] = np.diag(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.allclose(X, U.dot(S).dot(V))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "T = X.dot(V.T[:, :2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.allclose(T, U.dot(S)[:, :2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import PCA\n",
+    "pca = PCA(n_components = 2)\n",
+    "X2D_p = pca.fit_transform(X)\n",
+    "np.allclose(X2D_p, T)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "X3D_recover = T.dot(V[:2, :])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.allclose(X3D_recover, pca.inverse_transform(X2D_p))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "V"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "pca.components_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "R = pca.components_.T.dot(pca.components_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "S[:3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "pca.explained_variance_ratio_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "1 - pca.explained_variance_ratio_.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "S[0,0]**2/(S**2).sum(), S[1,1]**2/(S**2).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.sqrt((T[:, 1]**2).sum())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Manifold learning\n",
+    "Swiss roll:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_swiss_roll\n",
+    "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "axes = [-11.5, 14, -2, 23, -12, 15]\n",
+    "\n",
+    "fig = plt.figure(figsize=(6, 5))\n",
+    "ax = fig.add_subplot(111, projection='3d')\n",
+    "\n",
+    "ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, cmap=plt.cm.hot)\n",
+    "ax.view_init(10, -70)\n",
+    "ax.set_xlabel(\"$x_1$\", fontsize=18)\n",
+    "ax.set_ylabel(\"$x_2$\", fontsize=18)\n",
+    "ax.set_zlabel(\"$x_3$\", fontsize=18)\n",
+    "ax.set_xlim(axes[0:2])\n",
+    "ax.set_ylim(axes[2:4])\n",
+    "ax.set_zlim(axes[4:6])\n",
+    "\n",
+    "save_fig(\"swiss_roll_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(11, 4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=t, cmap=plt.cm.hot)\n",
+    "plt.axis(axes[:4])\n",
+    "plt.xlabel(\"$x_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$x_2$\", fontsize=18, rotation=0)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.scatter(t, X[:, 1], c=t, cmap=plt.cm.hot)\n",
+    "plt.axis([4, 15, axes[2], axes[3]])\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "save_fig(\"squished_swiss_roll_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from matplotlib import gridspec\n",
+    "\n",
+    "axes = [-11.5, 14, -2, 23, -12, 15]\n",
+    "\n",
+    "x2s = np.linspace(axes[2], axes[3], 10)\n",
+    "x3s = np.linspace(axes[4], axes[5], 10)\n",
+    "x2, x3 = np.meshgrid(x2s, x3s)\n",
+    "\n",
+    "fig = plt.figure(figsize=(6, 5))\n",
+    "ax = plt.subplot(111, projection='3d')\n",
+    "\n",
+    "positive_class = X[:, 0] > 5\n",
+    "X_pos = X[positive_class]\n",
+    "X_neg = X[~positive_class]\n",
+    "ax.view_init(10, -70)\n",
+    "ax.plot(X_neg[:, 0], X_neg[:, 1], X_neg[:, 2], \"y^\")\n",
+    "ax.plot_wireframe(5, x2, x3, alpha=0.5)\n",
+    "ax.plot(X_pos[:, 0], X_pos[:, 1], X_pos[:, 2], \"gs\")\n",
+    "ax.set_xlabel(\"$x_1$\", fontsize=18)\n",
+    "ax.set_ylabel(\"$x_2$\", fontsize=18)\n",
+    "ax.set_zlabel(\"$x_3$\", fontsize=18)\n",
+    "ax.set_xlim(axes[0:2])\n",
+    "ax.set_ylim(axes[2:4])\n",
+    "ax.set_zlim(axes[4:6])\n",
+    "\n",
+    "save_fig(\"manifold_decision_boundary_plot1\")\n",
+    "plt.show()\n",
+    "\n",
+    "fig = plt.figure(figsize=(5, 4))\n",
+    "ax = plt.subplot(111)\n",
+    "\n",
+    "plt.plot(t[positive_class], X[positive_class, 1], \"gs\")\n",
+    "plt.plot(t[~positive_class], X[~positive_class, 1], \"y^\")\n",
+    "plt.axis([4, 15, axes[2], axes[3]])\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "save_fig(\"manifold_decision_boundary_plot2\")\n",
+    "plt.show()\n",
+    "\n",
+    "fig = plt.figure(figsize=(6, 5))\n",
+    "ax = plt.subplot(111, projection='3d')\n",
+    "\n",
+    "positive_class = 2 * (t[:] - 4) > X[:, 1]\n",
+    "X_pos = X[positive_class]\n",
+    "X_neg = X[~positive_class]\n",
+    "ax.view_init(10, -70)\n",
+    "ax.plot(X_neg[:, 0], X_neg[:, 1], X_neg[:, 2], \"y^\")\n",
+    "ax.plot(X_pos[:, 0], X_pos[:, 1], X_pos[:, 2], \"gs\")\n",
+    "ax.set_xlabel(\"$x_1$\", fontsize=18)\n",
+    "ax.set_ylabel(\"$x_2$\", fontsize=18)\n",
+    "ax.set_zlabel(\"$x_3$\", fontsize=18)\n",
+    "ax.set_xlim(axes[0:2])\n",
+    "ax.set_ylim(axes[2:4])\n",
+    "ax.set_zlim(axes[4:6])\n",
+    "\n",
+    "save_fig(\"manifold_decision_boundary_plot3\")\n",
+    "plt.show()\n",
+    "\n",
+    "fig = plt.figure(figsize=(5, 4))\n",
+    "ax = plt.subplot(111)\n",
+    "\n",
+    "plt.plot(t[positive_class], X[positive_class, 1], \"gs\")\n",
+    "plt.plot(t[~positive_class], X[~positive_class, 1], \"y^\")\n",
+    "plt.plot([4, 15], [0, 22], \"b-\", linewidth=2)\n",
+    "plt.axis([4, 15, axes[2], axes[3]])\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "save_fig(\"manifold_decision_boundary_plot4\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "angle = np.pi / 5\n",
+    "stretch = 5\n",
+    "m = 200\n",
+    "\n",
+    "rnd.seed(3)\n",
+    "X = rnd.randn(m, 2) / 10\n",
+    "X = X.dot(np.array([[stretch, 0],[0, 1]])) # stretch\n",
+    "X = X.dot([[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]) # rotate\n",
+    "\n",
+    "u1 = np.array([np.cos(angle), np.sin(angle)])\n",
+    "u2 = np.array([np.cos(angle - 2 * np.pi/6), np.sin(angle - 2 * np.pi/6)])\n",
+    "u3 = np.array([np.cos(angle - np.pi/2), np.sin(angle - np.pi/2)])\n",
+    "\n",
+    "X_proj1 = X.dot(u1.reshape(-1, 1))\n",
+    "X_proj2 = X.dot(u2.reshape(-1, 1))\n",
+    "X_proj3 = X.dot(u3.reshape(-1, 1))\n",
+    "\n",
+    "plt.figure(figsize=(8,4))\n",
+    "plt.subplot2grid((3,2), (0, 0), rowspan=3)\n",
+    "plt.plot([-1.4, 1.4], [-1.4*u1[1]/u1[0], 1.4*u1[1]/u1[0]], \"k-\", linewidth=1)\n",
+    "plt.plot([-1.4, 1.4], [-1.4*u2[1]/u2[0], 1.4*u2[1]/u2[0]], \"k--\", linewidth=1)\n",
+    "plt.plot([-1.4, 1.4], [-1.4*u3[1]/u3[0], 1.4*u3[1]/u3[0]], \"k:\", linewidth=2)\n",
+    "plt.plot(X[:, 0], X[:, 1], \"bo\", alpha=0.5)\n",
+    "plt.axis([-1.4, 1.4, -1.4, 1.4])\n",
+    "plt.arrow(0, 0, u1[0], u1[1], head_width=0.1, linewidth=5, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n",
+    "plt.arrow(0, 0, u3[0], u3[1], head_width=0.1, linewidth=5, length_includes_head=True, head_length=0.1, fc='k', ec='k')\n",
+    "plt.text(u1[0] + 0.1, u1[1] - 0.05, r\"$\\mathbf{c_1}$\", fontsize=22)\n",
+    "plt.text(u3[0] + 0.1, u3[1], r\"$\\mathbf{c_2}$\", fontsize=22)\n",
+    "plt.xlabel(\"$x_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$x_2$\", fontsize=18, rotation=0)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "plt.subplot2grid((3,2), (0, 1))\n",
+    "plt.plot([-2, 2], [0, 0], \"k-\", linewidth=1)\n",
+    "plt.plot(X_proj1[:, 0], np.zeros(m), \"bo\", alpha=0.3)\n",
+    "plt.gca().get_yaxis().set_ticks([])\n",
+    "plt.gca().get_xaxis().set_ticklabels([])\n",
+    "plt.axis([-2, 2, -1, 1])\n",
+    "plt.grid(True)\n",
+    "\n",
+    "plt.subplot2grid((3,2), (1, 1))\n",
+    "plt.plot([-2, 2], [0, 0], \"k--\", linewidth=1)\n",
+    "plt.plot(X_proj2[:, 0], np.zeros(m), \"bo\", alpha=0.3)\n",
+    "plt.gca().get_yaxis().set_ticks([])\n",
+    "plt.gca().get_xaxis().set_ticklabels([])\n",
+    "plt.axis([-2, 2, -1, 1])\n",
+    "plt.grid(True)\n",
+    "\n",
+    "plt.subplot2grid((3,2), (2, 1))\n",
+    "plt.plot([-2, 2], [0, 0], \"k:\", linewidth=2)\n",
+    "plt.plot(X_proj3[:, 0], np.zeros(m), \"bo\", alpha=0.3)\n",
+    "plt.gca().get_yaxis().set_ticks([])\n",
+    "plt.axis([-2, 2, -1, 1])\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "save_fig(\"pca_best_projection\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MNIST compression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.cross_validation import train_test_split\n",
+    "from sklearn.datasets import fetch_mldata\n",
+    "\n",
+    "mnist = fetch_mldata('MNIST original')\n",
+    "X = mnist[\"data\"]\n",
+    "y = mnist[\"target\"]\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X = X_train\n",
+    "\n",
+    "pca = PCA()\n",
+    "pca.fit(X)\n",
+    "d = np.argmax(np.cumsum(pca.explained_variance_ratio_) >= 0.95) + 1\n",
+    "d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "pca = PCA(n_components=0.95)\n",
+    "X_reduced = pca.fit_transform(X)\n",
+    "pca.n_components_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.sum(pca.explained_variance_ratio_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_mnist = X_train\n",
+    "\n",
+    "pca = PCA(n_components = 154)\n",
+    "X_mnist_reduced = pca.fit_transform(X_mnist)\n",
+    "X_mnist_recovered = pca.inverse_transform(X_mnist_reduced)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def plot_digits(instances, images_per_row=5, **options):\n",
+    "    size = 28\n",
+    "    images_per_row = min(len(instances), images_per_row)\n",
+    "    images = [instance.reshape(size,size) for instance in instances]\n",
+    "    n_rows = (len(instances) - 1) // images_per_row + 1\n",
+    "    row_images = []\n",
+    "    n_empty = n_rows * images_per_row - len(instances)\n",
+    "    images.append(np.zeros((size, size * n_empty)))\n",
+    "    for row in range(n_rows):\n",
+    "        rimages = images[row * images_per_row : (row + 1) * images_per_row]\n",
+    "        row_images.append(np.concatenate(rimages, axis=1))\n",
+    "    image = np.concatenate(row_images, axis=0)\n",
+    "    plt.imshow(image, cmap = matplotlib.cm.binary, **options)\n",
+    "    plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(7, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_digits(X_mnist[::2100])\n",
+    "plt.title(\"Original\", fontsize=16)\n",
+    "plt.subplot(122)\n",
+    "plot_digits(X_mnist_recovered[::2100])\n",
+    "plt.title(\"Compressed\", fontsize=16)\n",
+    "\n",
+    "save_fig(\"mnist_compression_plot\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import IncrementalPCA\n",
+    "\n",
+    "n_batches = 100\n",
+    "inc_pca = IncrementalPCA(n_components=154)\n",
+    "for X_batch in np.array_split(X_mnist, n_batches):\n",
+    "    print(\".\", end=\"\")\n",
+    "    inc_pca.partial_fit(X_batch)\n",
+    "\n",
+    "X_mnist_reduced_inc = inc_pca.transform(X_mnist)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "X_mnist_recovered_inc = inc_pca.inverse_transform(X_mnist_reduced_inc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(7, 4))\n",
+    "plt.subplot(121)\n",
+    "plot_digits(X_mnist[::2100])\n",
+    "plt.subplot(122)\n",
+    "plot_digits(X_mnist_recovered_inc[::2100])\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.allclose(pca.mean_, inc_pca.mean_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.allclose(X_mnist_reduced, X_mnist_reduced_inc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "filename = \"my_mnist.data\"\n",
+    "\n",
+    "X_mm = np.memmap(filename, dtype='float32', mode='write', shape=X_mnist.shape)\n",
+    "X_mm[:] = X_mnist\n",
+    "del X_mm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_mm = np.memmap(filename, dtype='float32', mode='readonly', shape=X_mnist.shape)\n",
+    "\n",
+    "batch_size = len(X_mnist) // n_batches\n",
+    "inc_pca = IncrementalPCA(n_components=154, batch_size=batch_size)\n",
+    "inc_pca.fit(X_mm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import RandomizedPCA\n",
+    "\n",
+    "rnd_pca = RandomizedPCA(n_components=154, random_state=42)\n",
+    "X_reduced = rnd_pca.fit_transform(X_mnist)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "for n_components in (2, 10, 154):\n",
+    "    print(\"n_components =\", n_components)\n",
+    "    regular_pca = PCA(n_components=n_components)\n",
+    "    inc_pca = IncrementalPCA(n_components=154, batch_size=500)\n",
+    "    rnd_pca = RandomizedPCA(n_components=154, random_state=42)\n",
+    "\n",
+    "    for pca in (regular_pca, inc_pca, rnd_pca):\n",
+    "        t1 = time.time()\n",
+    "        pca.fit(X_mnist)\n",
+    "        t2 = time.time()\n",
+    "        print(pca.__class__.__name__, t2 - t1, \"seconds\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Kernel PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import KernelPCA\n",
+    "\n",
+    "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)\n",
+    "\n",
+    "lin_pca = KernelPCA(n_components = 2, kernel=\"linear\", fit_inverse_transform=True)\n",
+    "rbf_pca = KernelPCA(n_components = 2, kernel=\"rbf\", gamma=0.0433, fit_inverse_transform=True)\n",
+    "sig_pca = KernelPCA(n_components = 2, kernel=\"sigmoid\", gamma=0.001, coef0=1, fit_inverse_transform=True)\n",
+    "\n",
+    "y = t > 6.9\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "for subplot, pca, title in ((131, lin_pca, \"Linear kernel\"), (132, rbf_pca, \"RBF kernel, $\\gamma=0.04$\"), (133, sig_pca, \"Sigmoid kernel, $\\gamma=10^{-3}, r=1$\")):\n",
+    "    X_reduced = pca.fit_transform(X)\n",
+    "    if subplot == 132:\n",
+    "        X_reduced_rbf = X_reduced\n",
+    "    \n",
+    "    plt.subplot(subplot)\n",
+    "    #plt.plot(X_reduced[y, 0], X_reduced[y, 1], \"gs\")\n",
+    "    #plt.plot(X_reduced[~y, 0], X_reduced[~y, 1], \"y^\")\n",
+    "    plt.title(title, fontsize=14)\n",
+    "    plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)\n",
+    "    plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "    if subplot == 131:\n",
+    "        plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "    plt.grid(True)\n",
+    "\n",
+    "save_fig(\"kernel_pca_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(6, 5))\n",
+    "\n",
+    "X_inverse = pca.inverse_transform(X_reduced_rbf)\n",
+    "\n",
+    "ax = plt.subplot(111, projection='3d')\n",
+    "ax.view_init(10, -70)\n",
+    "ax.scatter(X_inverse[:, 0], X_inverse[:, 1], X_inverse[:, 2], c=t, cmap=plt.cm.hot, marker=\"x\")\n",
+    "ax.set_xlabel(\"\")\n",
+    "ax.set_ylabel(\"\")\n",
+    "ax.set_zlabel(\"\")\n",
+    "ax.set_xticklabels([])\n",
+    "ax.set_yticklabels([])\n",
+    "ax.set_zticklabels([])\n",
+    "\n",
+    "save_fig(\"preimage_plot\", tight_layout=False)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_reduced = rbf_pca.fit_transform(X)\n",
+    "\n",
+    "plt.figure(figsize=(11, 4))\n",
+    "plt.subplot(132)\n",
+    "plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot, marker=\"x\")\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "plt.grid(True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.grid_search import GridSearchCV\n",
+    "\n",
+    "clf = Pipeline([\n",
+    "        (\"kpca\", KernelPCA(n_components=2)),\n",
+    "        (\"log_reg\", LogisticRegression())\n",
+    "    ])\n",
+    "\n",
+    "param_grid = [\n",
+    "        {\"kpca__gamma\": np.linspace(0.03, 0.05, 10), \"kpca__kernel\": [\"rbf\", \"sigmoid\"]}\n",
+    "    ]\n",
+    "\n",
+    "grid_search = GridSearchCV(clf, param_grid, cv=3)\n",
+    "grid_search.fit(X, y)\n",
+    "grid_search.best_params_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "rbf_pca = KernelPCA(n_components = 2, kernel=\"rbf\", gamma=0.0433,\n",
+    "                    fit_inverse_transform=True)\n",
+    "X_reduced = rbf_pca.fit_transform(X)\n",
+    "X_preimage = rbf_pca.inverse_transform(X_reduced)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import mean_squared_error\n",
+    "mean_squared_error(X, X_preimage)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "times_rpca = []\n",
+    "times_pca = []\n",
+    "sizes = [1000, 10000, 20000, 30000, 40000, 50000, 70000, 100000, 200000, 500000]\n",
+    "for n_samples in sizes:\n",
+    "    X = rnd.randn(n_samples, 5)\n",
+    "    pca = RandomizedPCA(n_components = 2, random_state=42)\n",
+    "    t1 = time.time()\n",
+    "    pca.fit(X)\n",
+    "    t2 = time.time()\n",
+    "    times_rpca.append(t2 - t1)\n",
+    "    pca = PCA(n_components = 2)\n",
+    "    t1 = time.time()\n",
+    "    pca.fit(X)\n",
+    "    t2 = time.time()\n",
+    "    times_pca.append(t2 - t1)\n",
+    "\n",
+    "plt.plot(sizes, times_rpca, \"b-o\", label=\"RPCA\")\n",
+    "plt.plot(sizes, times_pca, \"r-s\", label=\"PCA\")\n",
+    "plt.xlabel(\"n_samples\")\n",
+    "plt.ylabel(\"Training time\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.title(\"PCA and Randomized PCA time complexity \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "times_rpca = []\n",
+    "times_pca = []\n",
+    "sizes = [1000, 2000, 3000, 4000, 5000, 6000]\n",
+    "for n_features in sizes:\n",
+    "    X = rnd.randn(2000, n_features)\n",
+    "    pca = RandomizedPCA(n_components = 2, random_state=42)\n",
+    "    t1 = time.time()\n",
+    "    pca.fit(X)\n",
+    "    t2 = time.time()\n",
+    "    times_rpca.append(t2 - t1)\n",
+    "    pca = PCA(n_components = 2)\n",
+    "    t1 = time.time()\n",
+    "    pca.fit(X)\n",
+    "    t2 = time.time()\n",
+    "    times_pca.append(t2 - t1)\n",
+    "\n",
+    "plt.plot(sizes, times_rpca, \"b-o\", label=\"RPCA\")\n",
+    "plt.plot(sizes, times_pca, \"r-s\", label=\"PCA\")\n",
+    "plt.xlabel(\"n_features\")\n",
+    "plt.ylabel(\"Training time\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.title(\"PCA and Randomized PCA time complexity \")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LLE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.manifold import LocallyLinearEmbedding\n",
+    "\n",
+    "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=41)\n",
+    "\n",
+    "lle = LocallyLinearEmbedding(n_neighbors=10, n_components=2, random_state=42)\n",
+    "X_reduced = lle.fit_transform(X)\n",
+    "\n",
+    "plt.title(\"Unrolled swiss roll using LLE\", fontsize=14)\n",
+    "plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$z_2$\", fontsize=18)\n",
+    "plt.axis([-0.065, 0.055, -0.1, 0.12])\n",
+    "plt.grid(True)\n",
+    "\n",
+    "save_fig(\"lle_unrolling_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MDS, Isomap and t-SNE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.manifold import MDS\n",
+    "mds = MDS(n_components=2, random_state=42)\n",
+    "X_reduced_mds = mds.fit_transform(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.manifold import Isomap\n",
+    "isomap = Isomap(n_components=2)\n",
+    "X_reduced_isomap = isomap.fit_transform(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.manifold import TSNE\n",
+    "tsne = TSNE(n_components=2)\n",
+    "X_reduced_tsne = tsne.fit_transform(X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
+    "lda = LinearDiscriminantAnalysis(n_components=2)\n",
+    "X_mnist = mnist[\"data\"]\n",
+    "y_mnist = mnist[\"target\"]\n",
+    "lda.fit(X_mnist, y_mnist)\n",
+    "X_reduced_lda = lda.transform(X_mnist)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "titles = [\"MDS\", \"Isomap\", \"t-SNE\"]\n",
+    "\n",
+    "plt.figure(figsize=(11,4))\n",
+    "\n",
+    "for subplot, title, X_reduced in zip((131, 132, 133), titles,\n",
+    "                                     (X_reduced_mds, X_reduced_isomap, X_reduced_tsne)):\n",
+    "    plt.subplot(subplot)\n",
+    "    plt.title(title, fontsize=14)\n",
+    "    plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)\n",
+    "    plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "    if subplot == 131:\n",
+    "        plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "    plt.grid(True)\n",
+    "\n",
+    "save_fig(\"other_dim_reduction_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "352px",
+   "width": "458px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/09_up_and_running_with_tensorflow.ipynb b/09_up_and_running_with_tensorflow.ipynb
new file mode 100644
index 0000000..138ad7d
--- /dev/null
+++ b/09_up_and_running_with_tensorflow.ipynb
@@ -0,0 +1,1709 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 9 – Up and running with TensorFlow**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 9._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"tensorflow\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating and running a graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "x = tf.Variable(3, name=\"x\")\n",
+    "y = tf.Variable(4, name=\"y\")\n",
+    "f = x*x*y + y + 2\n",
+    "\n",
+    "f"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "sess = tf.Session()\n",
+    "sess.run(x.initializer)\n",
+    "sess.run(y.initializer)\n",
+    "print(sess.run(f))\n",
+    "sess.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    x.initializer.run()\n",
+    "    y.initializer.run()\n",
+    "    result = f.eval()\n",
+    "\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "with tf.Session():\n",
+    "    init.run()\n",
+    "    result = f.eval()\n",
+    "\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "sess = tf.InteractiveSession()\n",
+    "init.run()\n",
+    "result = f.eval()\n",
+    "sess.close()\n",
+    "\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Managing graphs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "x1 = tf.Variable(1)\n",
+    "x1.graph is tf.get_default_graph()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "graph = tf.Graph()\n",
+    "with graph.as_default():\n",
+    "    x2 = tf.Variable(2)\n",
+    "\n",
+    "x2.graph is tf.get_default_graph()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "x2.graph is graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "w = tf.constant(3)\n",
+    "x = w + 2\n",
+    "y = x + 5\n",
+    "z = x * 3\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    print(y.eval())  # 10\n",
+    "    print(z.eval())  # 15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    y_val, z_val = sess.run([y, z])\n",
+    "    print(y)  # 10\n",
+    "    print(z)  # 15"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Linear Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using the Normal Equation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import fetch_california_housing\n",
+    "\n",
+    "housing = fetch_california_housing()\n",
+    "m, n = housing.data.shape\n",
+    "housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.constant(housing_data_plus_bias, dtype=tf.float64, name=\"X\")\n",
+    "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float64, name=\"y\")\n",
+    "XT = tf.transpose(X)\n",
+    "theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    result = theta.eval()\n",
+    "\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compare with pure NumPy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X = housing_data_plus_bias\n",
+    "y = housing.target.reshape(-1, 1)\n",
+    "theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)\n",
+    "\n",
+    "print(theta_numpy)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compare with Scikit-Learn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "lin_reg = LinearRegression()\n",
+    "lin_reg.fit(housing.data, housing.target.reshape(-1, 1))\n",
+    "\n",
+    "print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using Batch Gradient Descent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Gradient Descent requires scaling the feature vectors first. We could do this using TF, but let's just use Scikit-Learn for now."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "scaled_housing_data = scaler.fit_transform(housing.data)\n",
+    "scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(scaled_housing_data_plus_bias.mean(axis=0))\n",
+    "print(scaled_housing_data_plus_bias.mean(axis=1))\n",
+    "print(scaled_housing_data_plus_bias.mean())\n",
+    "print(scaled_housing_data_plus_bias.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Manually computing the gradients"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
+    "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "gradients = 2/m * tf.matmul(tf.transpose(X), error)\n",
+    "training_op = tf.assign(theta, theta - learning_rate * gradients)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        if epoch % 100 == 0:\n",
+    "            print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
+    "        sess.run(training_op)\n",
+    "    \n",
+    "    best_theta = theta.eval()\n",
+    "\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using autodiff\n",
+    "Same as above except for the `gradients = ...` line."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
+    "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "gradients = tf.gradients(mse, [theta])[0]\n",
+    "training_op = tf.assign(theta, theta - learning_rate * gradients)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        if epoch % 100 == 0:\n",
+    "            print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
+    "        sess.run(training_op)\n",
+    "    \n",
+    "    best_theta = theta.eval()\n",
+    "\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using a `GradientDescentOptimizer`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
+    "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        if epoch % 100 == 0:\n",
+    "            print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
+    "        sess.run(training_op)\n",
+    "    \n",
+    "    best_theta = theta.eval()\n",
+    "\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using a momentum optimizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
+    "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.25)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        sess.run(training_op)\n",
+    "    \n",
+    "    best_theta = theta.eval()\n",
+    "\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Feeding data to the training algorithm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Placeholder nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    ">>> tf.reset_default_graph()\n",
+    "\n",
+    ">>> A = tf.placeholder(tf.float32, shape=(None, 3))\n",
+    ">>> B = A + 5\n",
+    ">>> with tf.Session() as sess:\n",
+    "...     B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})\n",
+    "...     B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})\n",
+    "...\n",
+    ">>> print(B_val_1)\n",
+    ">>> print(B_val_2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mini-batch Gradient Descent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n",
+    "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def fetch_batch(epoch, batch_index, batch_size):\n",
+    "    rnd.seed(epoch * n_batches + batch_index)\n",
+    "    indices = rnd.randint(m, size=batch_size)\n",
+    "    X_batch = scaled_housing_data_plus_bias[indices]\n",
+    "    y_batch = housing.target.reshape(-1, 1)[indices]\n",
+    "    return X_batch, y_batch\n",
+    "\n",
+    "n_epochs = 10\n",
+    "batch_size = 100\n",
+    "n_batches = int(np.ceil(m / batch_size))\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for batch_index in range(n_batches):\n",
+    "            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "\n",
+    "    best_theta = theta.eval()\n",
+    "    \n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Saving and restoring a model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
+    "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        if epoch % 100 == 0:\n",
+    "            print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
+    "            save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
+    "        sess.run(training_op)\n",
+    "    \n",
+    "    best_theta = theta.eval()\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")\n",
+    "\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualizing the graph\n",
+    "## inside Jupyter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import clear_output, Image, display, HTML\n",
+    "\n",
+    "def strip_consts(graph_def, max_const_size=32):\n",
+    "    \"\"\"Strip large constant values from graph_def.\"\"\"\n",
+    "    strip_def = tf.GraphDef()\n",
+    "    for n0 in graph_def.node:\n",
+    "        n = strip_def.node.add() \n",
+    "        n.MergeFrom(n0)\n",
+    "        if n.op == 'Const':\n",
+    "            tensor = n.attr['value'].tensor\n",
+    "            size = len(tensor.tensor_content)\n",
+    "            if size > max_const_size:\n",
+    "                tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
+    "    return strip_def\n",
+    "\n",
+    "def show_graph(graph_def, max_const_size=32):\n",
+    "    \"\"\"Visualize TensorFlow graph.\"\"\"\n",
+    "    if hasattr(graph_def, 'as_graph_def'):\n",
+    "        graph_def = graph_def.as_graph_def()\n",
+    "    strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
+    "    code = \"\"\"\n",
+    "        <script>\n",
+    "          function load() {{\n",
+    "            document.getElementById(\"{id}\").pbtxt = {data};\n",
+    "          }}\n",
+    "        </script>\n",
+    "        <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
+    "        <div style=\"height:600px\">\n",
+    "          <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
+    "        </div>\n",
+    "    \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
+    "\n",
+    "    iframe = \"\"\"\n",
+    "        <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
+    "    \"\"\".format(code.replace('\"', '&quot;'))\n",
+    "    display(HTML(iframe))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "show_graph(tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using TensorBoard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from datetime import datetime\n",
+    "\n",
+    "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
+    "root_logdir = \"tf_logs\"\n",
+    "logdir = \"{}/run-{}/\".format(root_logdir, now)\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n",
+    "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "error = y_pred - y\n",
+    "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "mse_summary = tf.scalar_summary('MSE', mse)\n",
+    "summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "batch_size = 100\n",
+    "n_batches = int(np.ceil(m / batch_size))\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for batch_index in range(n_batches):\n",
+    "            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n",
+    "            if batch_index % 10 == 0:\n",
+    "                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "                step = epoch * n_batches + batch_index\n",
+    "                summary_writer.add_summary(summary_str, step)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "\n",
+    "    best_theta = theta.eval()\n",
+    "\n",
+    "summary_writer.flush()\n",
+    "summary_writer.close()\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Name scopes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
+    "root_logdir = \"tf_logs\"\n",
+    "logdir = \"{}/run-{}/\".format(root_logdir, now)\n",
+    "\n",
+    "n_epochs = 1000\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n",
+    "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n",
+    "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
+    "y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
+    "with tf.name_scope('loss') as scope:\n",
+    "    error = y_pred - y\n",
+    "    mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
+    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "mse_summary = tf.scalar_summary('MSE', mse)\n",
+    "summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "batch_size = 100\n",
+    "n_batches = int(np.ceil(m / batch_size))\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for batch_index in range(n_batches):\n",
+    "            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n",
+    "            if batch_index % 10 == 0:\n",
+    "                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "                step = epoch * n_batches + batch_index\n",
+    "                summary_writer.add_summary(summary_str, step)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "\n",
+    "    best_theta = theta.eval()\n",
+    "\n",
+    "summary_writer.flush()\n",
+    "summary_writer.close()\n",
+    "print(\"Best theta:\")\n",
+    "print(best_theta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(error.op.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(mse.op.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "a1 = tf.Variable(0, name=\"a\")      # name == \"a\"\n",
+    "a2 = tf.Variable(0, name=\"a\")      # name == \"a_1\"\n",
+    "\n",
+    "with tf.name_scope(\"param\"):       # name == \"param\"\n",
+    "    a3 = tf.Variable(0, name=\"a\")  # name == \"param/a\"\n",
+    "\n",
+    "with tf.name_scope(\"param\"):       # name == \"param_1\"\n",
+    "    a4 = tf.Variable(0, name=\"a\")  # name == \"param_1/a\"\n",
+    "\n",
+    "for node in (a1, a2, a3, a4):\n",
+    "    print(node.op.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Modularity"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An ugly flat code:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_features = 3\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "\n",
+    "w1 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights1\")\n",
+    "w2 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights2\")\n",
+    "b1 = tf.Variable(0.0, name=\"bias1\")\n",
+    "b2 = tf.Variable(0.0, name=\"bias2\")\n",
+    "\n",
+    "linear1 = tf.add(tf.matmul(X, w1), b1, name=\"linear1\")\n",
+    "linear2 = tf.add(tf.matmul(X, w2), b2, name=\"linear2\")\n",
+    "\n",
+    "relu1 = tf.maximum(linear1, 0, name=\"relu1\")\n",
+    "relu2 = tf.maximum(linear1, 0, name=\"relu2\")  # Oops, cut&paste error! Did you spot it?\n",
+    "\n",
+    "output = tf.add_n([relu1, relu2], name=\"output\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Much better, using a function to build the ReLUs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def relu(X):\n",
+    "    w_shape = int(X.get_shape()[1]), 1\n",
+    "    w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
+    "    b = tf.Variable(0.0, name=\"bias\")\n",
+    "    linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
+    "    return tf.maximum(linear, 0, name=\"relu\")\n",
+    "\n",
+    "n_features = 3\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "relus = [relu(X) for i in range(5)]\n",
+    "output = tf.add_n(relus, name=\"output\")\n",
+    "summary_writer = tf.train.SummaryWriter(\"logs/relu1\", tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Even better using name scopes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def relu(X):\n",
+    "    with tf.name_scope(\"relu\"):\n",
+    "        w_shape = int(X.get_shape()[1]), 1\n",
+    "        w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
+    "        b = tf.Variable(0.0, name=\"bias\")\n",
+    "        linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
+    "        return tf.maximum(linear, 0, name=\"max\")\n",
+    "\n",
+    "n_features = 3\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "relus = [relu(X) for i in range(5)]\n",
+    "output = tf.add_n(relus, name=\"output\")\n",
+    "\n",
+    "summary_writer = tf.train.SummaryWriter(\"logs/relu2\", tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "summary_writer.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Sharing a `threshold` variable the classic way, by defining it outside of the `relu()` function then passing it as a parameter:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def relu(X, threshold):\n",
+    "    with tf.name_scope(\"relu\"):\n",
+    "        w_shape = int(X.get_shape()[1]), 1\n",
+    "        w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
+    "        b = tf.Variable(0.0, name=\"bias\")\n",
+    "        linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
+    "        return tf.maximum(linear, threshold, name=\"max\")\n",
+    "\n",
+    "threshold = tf.Variable(0.0, name=\"threshold\")\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "relus = [relu(X, threshold) for i in range(5)]\n",
+    "output = tf.add_n(relus, name=\"output\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def relu(X):\n",
+    "    with tf.name_scope(\"relu\"):\n",
+    "        if not hasattr(relu, \"threshold\"):\n",
+    "            relu.threshold = tf.Variable(0.0, name=\"threshold\")\n",
+    "        w_shape = int(X.get_shape()[1]), 1\n",
+    "        w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
+    "        b = tf.Variable(0.0, name=\"bias\")\n",
+    "        linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
+    "        return tf.maximum(linear, relu.threshold, name=\"max\")\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "relus = [relu(X) for i in range(5)]\n",
+    "output = tf.add_n(relus, name=\"output\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def relu(X):\n",
+    "    with tf.variable_scope(\"relu\", reuse=True):\n",
+    "        threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n",
+    "        w_shape = int(X.get_shape()[1]), 1\n",
+    "        w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
+    "        b = tf.Variable(0.0, name=\"bias\")\n",
+    "        linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
+    "        return tf.maximum(linear, threshold, name=\"max\")\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "with tf.variable_scope(\"relu\"):\n",
+    "    threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n",
+    "relus = [relu(X) for i in range(5)]\n",
+    "output = tf.add_n(relus, name=\"output\")\n",
+    "\n",
+    "summary_writer = tf.train.SummaryWriter(\"logs/relu6\", tf.get_default_graph())\n",
+    "summary_writer.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def relu(X):\n",
+    "    with tf.variable_scope(\"relu\"):\n",
+    "        threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n",
+    "        w_shape = int(X.get_shape()[1]), 1\n",
+    "        w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
+    "        b = tf.Variable(0.0, name=\"bias\")\n",
+    "        linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
+    "        return tf.maximum(linear, threshold, name=\"max\")\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
+    "with tf.variable_scope(\"\") as scope:\n",
+    "    first_relu = relu(X)     # create the shared variable\n",
+    "    scope.reuse_variables()  # then reuse it\n",
+    "    relus = [first_relu] + [relu(X) for i in range(4)]\n",
+    "output = tf.add_n(relus, name=\"output\")\n",
+    "\n",
+    "summary_writer = tf.train.SummaryWriter(\"logs/relu8\", tf.get_default_graph())\n",
+    "summary_writer.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "with tf.variable_scope(\"param\"):\n",
+    "    x = tf.get_variable(\"x\", shape=(), initializer=tf.constant_initializer(0.))\n",
+    "    #x = tf.Variable(0., name=\"x\")\n",
+    "with tf.variable_scope(\"param\", reuse=True):\n",
+    "    y = tf.get_variable(\"x\")\n",
+    "\n",
+    "with tf.variable_scope(\"\", reuse=True):\n",
+    "    z = tf.get_variable(\"param/x\", shape=(), initializer=tf.constant_initializer(0.))\n",
+    "\n",
+    "print(x is y)\n",
+    "print(x.op.name)\n",
+    "print(y.op.name)\n",
+    "print(z.op.name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Extra material"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Strings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "text = np.array(\"Do you want some café?\".split())\n",
+    "text_tensor = tf.constant(text)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    print(text_tensor.eval())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Distributed TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "server = tf.train.Server.create_local_server()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "x = tf.constant(2) + tf.constant(3)\n",
+    "with tf.Session(server.target) as sess:\n",
+    "    print(sess.run(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "server.target"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "class Const(object):\n",
+    "    def __init__(self, value):\n",
+    "        self.value = value\n",
+    "    def evaluate(self, **variables):\n",
+    "        return self.value\n",
+    "    def __str__(self):\n",
+    "        return str(self.value)\n",
+    "\n",
+    "class Var(object):\n",
+    "    def __init__(self, name):\n",
+    "        self.name = name\n",
+    "    def evaluate(self, **variables):\n",
+    "        return variables[self.name]\n",
+    "    def __str__(self):\n",
+    "        return self.name\n",
+    "\n",
+    "class BinaryOperator(object):\n",
+    "    def __init__(self, a, b):\n",
+    "        self.a = a\n",
+    "        self.b = b\n",
+    "\n",
+    "class Add(BinaryOperator):\n",
+    "    def evaluate(self, **variables):\n",
+    "        return self.a.evaluate(**variables) + self.b.evaluate(**variables)\n",
+    "    def __str__(self):\n",
+    "        return \"{} + {}\".format(self.a, self.b)\n",
+    "\n",
+    "class Mul(BinaryOperator):\n",
+    "    def evaluate(self, **variables):\n",
+    "        return self.a.evaluate(**variables) * self.b.evaluate(**variables)\n",
+    "    def __str__(self):\n",
+    "        return \"({}) * ({})\".format(self.a, self.b)\n",
+    "\n",
+    "x = Var(\"x\")\n",
+    "y = Var(\"y\")\n",
+    "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
+    "print(\"f(x,y) =\", f)\n",
+    "print(\"f(3,4) =\", f.evaluate(x=3, y=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Computing gradients\n",
+    "### Mathematical differentiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "df_dx = Mul(Const(2), Mul(Var(\"x\"), Var(\"y\")))  # df/dx = 2xy\n",
+    "df_dy = Add(Mul(Var(\"x\"), Var(\"x\")), Const(1))  # df/dy = x² + 1\n",
+    "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n",
+    "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Numerical differentiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def derivative(f, x, y, x_eps, y_eps):\n",
+    "    return (f.evaluate(x = x + x_eps, y = y + y_eps) - f.evaluate(x = x, y = y)) / (x_eps + y_eps)\n",
+    "\n",
+    "df_dx_34 = derivative(f, x=3, y=4, x_eps=0.0001, y_eps=0)\n",
+    "df_dy_34 = derivative(f, x=3, y=4, x_eps=0, y_eps=0.0001)\n",
+    "print(\"df/dx(3,4) =\", df_dx_34)\n",
+    "print(\"df/dy(3,4) =\", df_dy_34)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def f(x, y):\n",
+    "    return x**2*y + y + 2\n",
+    "\n",
+    "def derivative(f, x, y, x_eps, y_eps):\n",
+    "    return (f(x + x_eps, y + y_eps) - f(x, y)) / (x_eps + y_eps)\n",
+    "\n",
+    "df_dx = derivative(f, 3, 4, 0.00001, 0)\n",
+    "df_dy = derivative(f, 3, 4, 0, 0.00001)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(df_dx)\n",
+    "print(df_dy)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Symbolic differentiation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "Const.derive = lambda self, var: Const(0)\n",
+    "Var.derive = lambda self, var: Const(1) if self.name==var else Const(0)\n",
+    "Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))\n",
+    "Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))\n",
+    "\n",
+    "x = Var(\"x\")\n",
+    "y = Var(\"y\")\n",
+    "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
+    "\n",
+    "df_dx = f.derive(\"x\")  # 2xy\n",
+    "df_dy = f.derive(\"y\")  # x² + 1\n",
+    "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n",
+    "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Automatic differentiation (autodiff) – forward mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "class Const(object):\n",
+    "    def __init__(self, value):\n",
+    "        self.value = value\n",
+    "    def evaluate(self, derive, **variables):\n",
+    "        return self.value, 0\n",
+    "    def __str__(self):\n",
+    "        return str(self.value)\n",
+    "\n",
+    "class Var(object):\n",
+    "    def __init__(self, name):\n",
+    "        self.name = name\n",
+    "    def evaluate(self, derive, **variables):\n",
+    "        return variables[self.name], (1 if derive == self.name else 0)\n",
+    "    def __str__(self):\n",
+    "        return self.name\n",
+    "\n",
+    "class BinaryOperator(object):\n",
+    "    def __init__(self, a, b):\n",
+    "        self.a = a\n",
+    "        self.b = b\n",
+    "\n",
+    "class Add(BinaryOperator):\n",
+    "    def evaluate(self, derive, **variables):\n",
+    "        a, da = self.a.evaluate(derive, **variables)\n",
+    "        b, db = self.b.evaluate(derive, **variables)\n",
+    "        return a + b, da + db\n",
+    "    def __str__(self):\n",
+    "        return \"{} + {}\".format(self.a, self.b)\n",
+    "\n",
+    "class Mul(BinaryOperator):\n",
+    "    def evaluate(self, derive, **variables):\n",
+    "        a, da = self.a.evaluate(derive, **variables)\n",
+    "        b, db = self.b.evaluate(derive, **variables)\n",
+    "        return a * b, a * db + da * b\n",
+    "    def __str__(self):\n",
+    "        return \"({}) * ({})\".format(self.a, self.b)\n",
+    "\n",
+    "x = Var(\"x\")\n",
+    "y = Var(\"y\")\n",
+    "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
+    "f34, df_dx_34 = f.evaluate(x=3, y=4, derive=\"x\")\n",
+    "f34, df_dy_34 = f.evaluate(x=3, y=4, derive=\"y\")\n",
+    "print(\"f(3,4) =\", f34)\n",
+    "print(\"df/dx(3,4) =\", df_dx_34)\n",
+    "print(\"df/dy(3,4) =\", df_dy_34)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Autodiff – Reverse mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "class Const(object):\n",
+    "    def __init__(self, value):\n",
+    "        self.derivative = 0\n",
+    "        self.value = value\n",
+    "    def evaluate(self, **variables):\n",
+    "        return self.value\n",
+    "    def backpropagate(self, derivative):\n",
+    "        pass\n",
+    "    def __str__(self):\n",
+    "        return str(self.value)\n",
+    "\n",
+    "class Var(object):\n",
+    "    def __init__(self, name):\n",
+    "        self.name = name\n",
+    "    def evaluate(self, **variables):\n",
+    "        self.derivative = 0\n",
+    "        self.value = variables[self.name]\n",
+    "        return self.value\n",
+    "    def backpropagate(self, derivative):\n",
+    "        self.derivative += derivative\n",
+    "    def __str__(self):\n",
+    "        return self.name\n",
+    "\n",
+    "class BinaryOperator(object):\n",
+    "    def __init__(self, a, b):\n",
+    "        self.a = a\n",
+    "        self.b = b\n",
+    "\n",
+    "class Add(BinaryOperator):\n",
+    "    def evaluate(self, **variables):\n",
+    "        self.derivative = 0\n",
+    "        self.value = self.a.evaluate(**variables) + self.b.evaluate(**variables)\n",
+    "        return self.value\n",
+    "    def backpropagate(self, derivative):\n",
+    "        self.derivative += derivative\n",
+    "        self.a.backpropagate(derivative)\n",
+    "        self.b.backpropagate(derivative)\n",
+    "    def __str__(self):\n",
+    "        return \"{} + {}\".format(self.a, self.b)\n",
+    "\n",
+    "class Mul(BinaryOperator):\n",
+    "    def evaluate(self, **variables):\n",
+    "        self.derivative = 0\n",
+    "        self.value = self.a.evaluate(**variables) * self.b.evaluate(**variables)\n",
+    "        return self.value\n",
+    "    def backpropagate(self, derivative):\n",
+    "        self.derivative += derivative\n",
+    "        self.a.backpropagate(derivative * self.b.value)\n",
+    "        self.b.backpropagate(derivative * self.a.value)\n",
+    "    def __str__(self):\n",
+    "        return \"({}) * ({})\".format(self.a, self.b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "x = Var(\"x\")\n",
+    "y = Var(\"y\")\n",
+    "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
+    "f34 = f.evaluate(x=3, y=4)\n",
+    "f.backpropagate(1)\n",
+    "print(\"f(3,4) =\", f34)\n",
+    "print(\"df/dx(3,4) =\", x.derivative)\n",
+    "print(\"df/dy(3,4) =\", y.derivative)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Autodiff – reverse mode (using TensorFlow)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "x = tf.Variable(3., name=\"x\")\n",
+    "y = tf.Variable(4., name=\"x\")\n",
+    "f = x*x*y + y + 2\n",
+    "\n",
+    "gradients = tf.gradients(f, [x, y])\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    f_val, gradients_val = sess.run([f, gradients])\n",
+    "\n",
+    "f_val, gradients_val"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "603px",
+   "width": "616px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": true
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/10_introduction_to_artificial_neural_networks.ipynb b/10_introduction_to_artificial_neural_networks.ipynb
new file mode 100644
index 0000000..2a512a5
--- /dev/null
+++ b/10_introduction_to_artificial_neural_networks.ipynb
@@ -0,0 +1,660 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 10 – Introduction to Artificial Neural Networks**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 10._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"ann\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Perceptrons"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_iris\n",
+    "iris = load_iris()\n",
+    "X = iris.data[:, (2, 3)]  # petal length, petal width\n",
+    "y = (iris.target == 0).astype(np.int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import Perceptron\n",
+    "\n",
+    "per_clf = Perceptron(random_state=42)\n",
+    "per_clf.fit(X, y)\n",
+    "\n",
+    "y_pred = per_clf.predict([[2, 0.5]])\n",
+    "y_pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n",
+    "b = -per_clf.intercept_ / per_clf.coef_[0][1]\n",
+    "\n",
+    "axes = [0, 5, 0, 2]\n",
+    "\n",
+    "x0, x1 = np.meshgrid(\n",
+    "        np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n",
+    "        np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n",
+    "    )\n",
+    "X_new = np.c_[x0.ravel(), x1.ravel()]\n",
+    "y_predict = per_clf.predict(X_new)\n",
+    "zz = y_predict.reshape(x0.shape)\n",
+    "\n",
+    "plt.figure(figsize=(10, 4))\n",
+    "plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n",
+    "plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n",
+    "\n",
+    "plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n",
+    "from matplotlib.colors import ListedColormap\n",
+    "custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n",
+    "\n",
+    "plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n",
+    "plt.xlabel(\"Petal length\", fontsize=14)\n",
+    "plt.ylabel(\"Petal width\", fontsize=14)\n",
+    "plt.legend(loc=\"lower right\", fontsize=14)\n",
+    "plt.axis(axes)\n",
+    "\n",
+    "save_fig(\"perceptron_iris_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Activation functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def logit(z):\n",
+    "    return 1 / (1 + np.exp(-z))\n",
+    "\n",
+    "def relu(z):\n",
+    "    return np.maximum(0, z)\n",
+    "\n",
+    "def derivative(f, z, eps=0.000001):\n",
+    "    return (f(z + eps) - f(z - eps))/(2 * eps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "z = np.linspace(-5, 5, 200)\n",
+    "\n",
+    "plt.figure(figsize=(11,4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.plot(z, np.sign(z), \"r-\", linewidth=2, label=\"Step\")\n",
+    "plt.plot(z, logit(z), \"g--\", linewidth=2, label=\"Logit\")\n",
+    "plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n",
+    "plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
+    "plt.grid(True)\n",
+    "plt.legend(loc=\"center right\", fontsize=14)\n",
+    "plt.title(\"Activation functions\", fontsize=14)\n",
+    "plt.axis([-5, 5, -1.2, 1.2])\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=2, label=\"Step\")\n",
+    "plt.plot(0, 0, \"ro\", markersize=5)\n",
+    "plt.plot(0, 0, \"rx\", markersize=10)\n",
+    "plt.plot(z, derivative(logit, z), \"g--\", linewidth=2, label=\"Logit\")\n",
+    "plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n",
+    "plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
+    "plt.grid(True)\n",
+    "#plt.legend(loc=\"center right\", fontsize=14)\n",
+    "plt.title(\"Derivatives\", fontsize=14)\n",
+    "plt.axis([-5, 5, -0.2, 1.2])\n",
+    "\n",
+    "save_fig(\"activation_functions_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def heaviside(z):\n",
+    "    return (z >= 0).astype(z.dtype)\n",
+    "\n",
+    "def sigmoid(z):\n",
+    "    return 1/(1+np.exp(-z))\n",
+    "\n",
+    "def mlp_xor(x1, x2, activation=heaviside):\n",
+    "    return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "x1s = np.linspace(-0.2, 1.2, 100)\n",
+    "x2s = np.linspace(-0.2, 1.2, 100)\n",
+    "x1, x2 = np.meshgrid(x1s, x2s)\n",
+    "\n",
+    "z1 = mlp_xor(x1, x2, activation=heaviside)\n",
+    "z2 = mlp_xor(x1, x2, activation=sigmoid)\n",
+    "\n",
+    "plt.figure(figsize=(10,4))\n",
+    "\n",
+    "plt.subplot(121)\n",
+    "plt.contourf(x1, x2, z1)\n",
+    "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
+    "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
+    "plt.title(\"Activation function: heaviside\", fontsize=14)\n",
+    "plt.grid(True)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.contourf(x1, x2, z2)\n",
+    "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
+    "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
+    "plt.title(\"Activation function: sigmoid\", fontsize=14)\n",
+    "plt.grid(True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# FNN for MNIST"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## using tf.learn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.examples.tutorials.mnist import input_data\n",
+    "mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
+    "X_train = mnist.train.images\n",
+    "X_test = mnist.test.images\n",
+    "y_train = mnist.train.labels.astype(\"int\")\n",
+    "y_test = mnist.test.labels.astype(\"int\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
+    "dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=10,\n",
+    "                                         feature_columns=feature_columns)\n",
+    "dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import accuracy_score\n",
+    "\n",
+    "y_pred = dnn_clf.predict(X_test)\n",
+    "accuracy = accuracy_score(y_test, y_pred)\n",
+    "accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import log_loss\n",
+    "\n",
+    "y_pred_proba = dnn_clf.predict_proba(X_test)\n",
+    "log_loss(y_test, y_pred_proba)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "dnn_clf.evaluate(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "## Using plain TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "def neuron_layer(X, n_neurons, name, activation=None):\n",
+    "    with tf.name_scope(name):\n",
+    "        n_inputs = int(X.get_shape()[1])\n",
+    "        stddev = 1 / np.sqrt(n_inputs)\n",
+    "        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n",
+    "        W = tf.Variable(init, name=\"weights\")\n",
+    "        b = tf.Variable(tf.zeros([n_neurons]), name=\"biases\")\n",
+    "        Z = tf.matmul(X, W) + b\n",
+    "        if activation==\"relu\":\n",
+    "            return tf.nn.relu(Z)\n",
+    "        else:\n",
+    "            return Z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28*28  # MNIST\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 100\n",
+    "n_outputs = 10\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    hidden1 = neuron_layer(X, n_hidden1, \"hidden1\", activation=\"relu\")\n",
+    "    hidden2 = neuron_layer(hidden1, n_hidden2, \"hidden2\", activation=\"relu\")\n",
+    "    logits = neuron_layer(hidden2, n_outputs, \"output\")\n",
+    "\n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
+    "    training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_final.ckpt\")\n",
+    "    X_new_scaled = mnist.test.images[:20]\n",
+    "    Z = logits.eval(feed_dict={X: X_new_scaled})\n",
+    "    print(np.argmax(Z, axis=1))\n",
+    "    print(mnist.test.labels[:20])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import clear_output, Image, display, HTML\n",
+    "\n",
+    "def strip_consts(graph_def, max_const_size=32):\n",
+    "    \"\"\"Strip large constant values from graph_def.\"\"\"\n",
+    "    strip_def = tf.GraphDef()\n",
+    "    for n0 in graph_def.node:\n",
+    "        n = strip_def.node.add() \n",
+    "        n.MergeFrom(n0)\n",
+    "        if n.op == 'Const':\n",
+    "            tensor = n.attr['value'].tensor\n",
+    "            size = len(tensor.tensor_content)\n",
+    "            if size > max_const_size:\n",
+    "                tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
+    "    return strip_def\n",
+    "\n",
+    "def show_graph(graph_def, max_const_size=32):\n",
+    "    \"\"\"Visualize TensorFlow graph.\"\"\"\n",
+    "    if hasattr(graph_def, 'as_graph_def'):\n",
+    "        graph_def = graph_def.as_graph_def()\n",
+    "    strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
+    "    code = \"\"\"\n",
+    "        <script>\n",
+    "          function load() {{\n",
+    "            document.getElementById(\"{id}\").pbtxt = {data};\n",
+    "          }}\n",
+    "        </script>\n",
+    "        <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
+    "        <div style=\"height:600px\">\n",
+    "          <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
+    "        </div>\n",
+    "    \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
+    "\n",
+    "    iframe = \"\"\"\n",
+    "        <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
+    "    \"\"\".format(code.replace('\"', '&quot;'))\n",
+    "    display(HTML(iframe))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_graph(tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using `fully_connected` instead of `neuron_layer()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_inputs = 28*28  # MNIST\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 100\n",
+    "n_outputs = 10\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
+    "    hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
+    "    logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
+    "    training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "n_batches = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_graph(tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "264px",
+   "width": "369px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/11_deep_learning.ipynb b/11_deep_learning.ipynb
new file mode 100644
index 0000000..761a60f
--- /dev/null
+++ b/11_deep_learning.ipynb
@@ -0,0 +1,931 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 11 – Deep Learning**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 11._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"deep\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Activation functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def logit(z):\n",
+    "    return 1 / (1 + np.exp(-z))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "z = np.linspace(-5, 5, 200)\n",
+    "\n",
+    "plt.plot([-5, 5], [0, 0], 'k-')\n",
+    "plt.plot([-5, 5], [1, 1], 'k--')\n",
+    "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n",
+    "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n",
+    "plt.plot(z, logit(z), \"b-\", linewidth=2)\n",
+    "props = dict(facecolor='black', shrink=0.1)\n",
+    "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n",
+    "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n",
+    "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n",
+    "plt.grid(True)\n",
+    "plt.title(\"Sigmoid activation function\", fontsize=14)\n",
+    "plt.axis([-5, 5, -0.2, 1.2])\n",
+    "\n",
+    "save_fig(\"sigmoid_saturation_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def leaky_relu(z, alpha=0.01):\n",
+    "    return np.maximum(alpha*z, z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n",
+    "plt.plot([-5, 5], [0, 0], 'k-')\n",
+    "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n",
+    "plt.grid(True)\n",
+    "props = dict(facecolor='black', shrink=0.1)\n",
+    "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n",
+    "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n",
+    "plt.axis([-5, 5, -0.5, 4.2])\n",
+    "\n",
+    "save_fig(\"leaky_relu_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def elu(z, alpha=1):\n",
+    "    return np.where(z<0, alpha*(np.exp(z)-1), z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.plot(z, elu(z), \"b-\", linewidth=2)\n",
+    "plt.plot([-5, 5], [0, 0], 'k-')\n",
+    "plt.plot([-5, 5], [-1, -1], 'k--')\n",
+    "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
+    "plt.grid(True)\n",
+    "props = dict(facecolor='black', shrink=0.1)\n",
+    "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n",
+    "plt.axis([-5, 5, -2.2, 3.2])\n",
+    "\n",
+    "save_fig(\"elu_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.examples.tutorials.mnist import input_data\n",
+    "mnist = input_data.read_data_sets(\"/tmp/data/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def leaky_relu(z, name=None):\n",
+    "  return tf.maximum(0.01 * z, z, name=name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import clear_output, Image, display, HTML\n",
+    "\n",
+    "def strip_consts(graph_def, max_const_size=32):\n",
+    "    \"\"\"Strip large constant values from graph_def.\"\"\"\n",
+    "    strip_def = tf.GraphDef()\n",
+    "    for n0 in graph_def.node:\n",
+    "        n = strip_def.node.add() \n",
+    "        n.MergeFrom(n0)\n",
+    "        if n.op == 'Const':\n",
+    "            tensor = n.attr['value'].tensor\n",
+    "            size = len(tensor.tensor_content)\n",
+    "            if size > max_const_size:\n",
+    "                tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
+    "    return strip_def\n",
+    "\n",
+    "def show_graph(graph_def, max_const_size=32):\n",
+    "    \"\"\"Visualize TensorFlow graph.\"\"\"\n",
+    "    if hasattr(graph_def, 'as_graph_def'):\n",
+    "        graph_def = graph_def.as_graph_def()\n",
+    "    strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
+    "    code = \"\"\"\n",
+    "        <script>\n",
+    "          function load() {{\n",
+    "            document.getElementById(\"{id}\").pbtxt = {data};\n",
+    "          }}\n",
+    "        </script>\n",
+    "        <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
+    "        <div style=\"height:600px\">\n",
+    "          <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
+    "        </div>\n",
+    "    \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
+    "\n",
+    "    iframe = \"\"\"\n",
+    "        <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
+    "    \"\"\".format(code.replace('\"', '&quot;'))\n",
+    "    display(HTML(iframe))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28*28  # MNIST\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 100\n",
+    "n_outputs = 10\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    hidden1 = fully_connected(X, n_hidden1, activation_fn=leaky_relu, scope=\"hidden1\")\n",
+    "    hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=leaky_relu, scope=\"hidden2\")\n",
+    "    logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
+    "    training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "batch_size = 100\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Batch Normalization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.contrib.layers import fully_connected, batch_norm\n",
+    "from tensorflow.contrib.framework import arg_scope\n",
+    "\n",
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28 * 28  # MNIST\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 100\n",
+    "n_outputs = 10\n",
+    "learning_rate = 0.01\n",
+    "momentum = 0.25\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    he_init = tf.contrib.layers.variance_scaling_initializer()\n",
+    "    batch_norm_params = {\n",
+    "        'is_training': is_training,\n",
+    "        'decay': 0.9,\n",
+    "        'updates_collections': None,\n",
+    "        'scale': True,\n",
+    "    }\n",
+    "\n",
+    "    with arg_scope(\n",
+    "            [fully_connected],\n",
+    "            activation_fn=tf.nn.elu,\n",
+    "            weights_initializer=he_init,\n",
+    "            normalizer_fn=batch_norm,\n",
+    "            normalizer_params=batch_norm_params):\n",
+    "        hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
+    "        hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
+    "        logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
+    "    training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    he_init = tf.contrib.layers.variance_scaling_initializer()\n",
+    "    batch_norm_params = {\n",
+    "        'is_training': is_training,\n",
+    "        'decay': 0.9,\n",
+    "        'updates_collections': None,\n",
+    "        'scale': True,\n",
+    "    }\n",
+    "\n",
+    "    with arg_scope(\n",
+    "            [fully_connected],\n",
+    "            activation_fn=tf.nn.elu,\n",
+    "            weights_initializer=he_init,\n",
+    "            normalizer_fn=batch_norm,\n",
+    "            normalizer_params=batch_norm_params,\n",
+    "            weights_regularizer=tf.contrib.layers.l1_regularizer(0.01)):\n",
+    "        hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
+    "        hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
+    "        logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n",
+    "    base_loss = tf.reduce_mean(xentropy, name=\"base_loss\")\n",
+    "    loss = tf.add(base_loss, reg_losses, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
+    "    training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "[v.name for v in tf.all_variables()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.variable_scope(\"\", reuse=True):\n",
+    "    weights1 = tf.get_variable(\"hidden1/weights\")\n",
+    "    weights2 = tf.get_variable(\"hidden2/weights\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "x = tf.constant([0., 0., 3., 4., 30., 40., 300., 400.], shape=(4, 2))\n",
+    "c = tf.clip_by_norm(x, clip_norm=10)\n",
+    "c0 = tf.clip_by_norm(x, clip_norm=350, axes=0)\n",
+    "c1 = tf.clip_by_norm(x, clip_norm=10, axes=1)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    xv = x.eval()\n",
+    "    cv = c.eval()\n",
+    "    c0v = c0.eval()\n",
+    "    c1v = c1.eval()\n",
+    "\n",
+    "print(xv)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(cv)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(np.linalg.norm(cv))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(c0v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(np.linalg.norm(c0v, axis=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(c1v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(np.linalg.norm(c1v, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
+    "\n",
+    "def max_norm_regularizer(threshold, axes=1, name=\"max_norm\", collection=\"max_norm\"):\n",
+    "    def max_norm(weights):\n",
+    "        clip_weights = tf.assign(weights, tf.clip_by_norm(weights, clip_norm=threshold, axes=axes), name=name)\n",
+    "        tf.add_to_collection(collection, clip_weights)\n",
+    "        return None # there is no regularization loss term\n",
+    "    return max_norm\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    with arg_scope(\n",
+    "            [fully_connected],\n",
+    "            weights_regularizer=max_norm_regularizer(1.5)):\n",
+    "        hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
+    "        hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
+    "        logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "clip_all_weights = tf.get_collection(\"max_norm\")\n",
+    "        \n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
+    "    threshold = 1.0\n",
+    "    grads_and_vars = optimizer.compute_gradients(loss)\n",
+    "    capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)\n",
+    "                  for grad, var in grads_and_vars]\n",
+    "    training_op = optimizer.apply_gradients(capped_gvs)\n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_graph(tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.contrib.layers import dropout\n",
+    "\n",
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
+    "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
+    "\n",
+    "initial_learning_rate = 0.1\n",
+    "decay_steps = 10000\n",
+    "decay_rate = 1/10\n",
+    "global_step = tf.Variable(0, trainable=False)\n",
+    "learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n",
+    "                                           decay_steps, decay_rate)\n",
+    "\n",
+    "keep_prob = 0.5\n",
+    "\n",
+    "with tf.name_scope(\"dnn\"):\n",
+    "    he_init = tf.contrib.layers.variance_scaling_initializer()\n",
+    "    with arg_scope(\n",
+    "            [fully_connected],\n",
+    "            activation_fn=tf.nn.elu,\n",
+    "            weights_initializer=he_init):\n",
+    "        X_drop = dropout(X, keep_prob, is_training=is_training)\n",
+    "        hidden1 = fully_connected(X_drop, n_hidden1, scope=\"hidden1\")\n",
+    "        hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)\n",
+    "        hidden2 = fully_connected(hidden1_drop, n_hidden2, scope=\"hidden2\")\n",
+    "        hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)\n",
+    "        logits = fully_connected(hidden2_drop, n_outputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
+    "    training_op = optimizer.minimize(loss, global_step=global_step)    \n",
+    "\n",
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 20\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n",
+    "                               scope=\"hidden[2]|outputs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "training_op2 = optimizer.minimize(loss, var_list=train_vars)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "for i in tf.all_variables():\n",
+    "    print(i.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n",
+    "    print(i.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "for i in train_vars:\n",
+    "    print(i.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_train = mnist.train.images\n",
+    "y_train = mnist.train.labels.astype(\"int\")\n",
+    "X_val = mnist.test.images[8000:]\n",
+    "y_val = mnist.test.labels[8000:].astype(\"int\")\n",
+    "\n",
+    "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
+    "dnn_clf = tf.contrib.learn.DNNClassifier(\n",
+    "        feature_columns = feature_columns,\n",
+    "        hidden_units=[300, 100],\n",
+    "        n_classes=10,\n",
+    "        model_dir=\"/tmp/my_model\",\n",
+    "        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=60)\n",
+    "    )\n",
+    "\n",
+    "validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(\n",
+    "        X_val,\n",
+    "        y_val,\n",
+    "        every_n_steps=50,\n",
+    "        early_stopping_metric=\"loss\",\n",
+    "        early_stopping_metric_minimize=True,\n",
+    "        early_stopping_rounds=2000\n",
+    "    )\n",
+    "\n",
+    "dnn_clf.fit(x=X_train,\n",
+    "            y=y_train,\n",
+    "            steps=40000,\n",
+    "            monitors=[validation_monitor]\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "360px",
+   "width": "416px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/12_distributed_tensorflow.ipynb b/12_distributed_tensorflow.ipynb
new file mode 100644
index 0000000..c438d48
--- /dev/null
+++ b/12_distributed_tensorflow.ipynb
@@ -0,0 +1,494 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 12 – Distributed TensorFlow**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 12._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"distributed\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Local server"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "c = tf.constant(\"Hello distributed TensorFlow!\")\n",
+    "server = tf.train.Server.create_local_server()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session(server.target) as sess:\n",
+    "    print(sess.run(c))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "cluster_spec = tf.train.ClusterSpec({\n",
+    "    \"ps\": [\n",
+    "        \"127.0.0.1:2221\",  # /job:ps/task:0\n",
+    "        \"127.0.0.1:2222\",  # /job:ps/task:1\n",
+    "    ],\n",
+    "    \"worker\": [\n",
+    "        \"127.0.0.1:2223\",  # /job:worker/task:0\n",
+    "        \"127.0.0.1:2224\",  # /job:worker/task:1\n",
+    "        \"127.0.0.1:2225\",  # /job:worker/task:2\n",
+    "    ]})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "task_ps0 = tf.train.Server(cluster_spec, job_name=\"ps\", task_index=0)\n",
+    "task_ps1 = tf.train.Server(cluster_spec, job_name=\"ps\", task_index=1)\n",
+    "task_worker0 = tf.train.Server(cluster_spec, job_name=\"worker\", task_index=0)\n",
+    "task_worker1 = tf.train.Server(cluster_spec, job_name=\"worker\", task_index=1)\n",
+    "task_worker2 = tf.train.Server(cluster_spec, job_name=\"worker\", task_index=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pinning operations across devices and servers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "with tf.device(\"/job:ps\"):\n",
+    "    a = tf.Variable(1.0, name=\"a\")\n",
+    "\n",
+    "with tf.device(\"/job:worker\"):\n",
+    "    b = a + 2\n",
+    "\n",
+    "with tf.device(\"/job:worker/task:1\"):\n",
+    "    c = a + b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session(\"grpc://127.0.0.1:2221\") as sess:\n",
+    "    sess.run(a.initializer)\n",
+    "    print(c.eval())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "with tf.device(tf.train.replica_device_setter(\n",
+    "        ps_tasks=2,\n",
+    "        ps_device=\"/job:ps\",\n",
+    "        worker_device=\"/job:worker\")):\n",
+    "    v1 = tf.Variable(1.0, name=\"v1\")  # pinned to /job:ps/task:0 (defaults to /cpu:0)\n",
+    "    v2 = tf.Variable(2.0, name=\"v2\")  # pinned to /job:ps/task:1 (defaults to /cpu:0)\n",
+    "    v3 = tf.Variable(3.0, name=\"v3\")  # pinned to /job:ps/task:0 (defaults to /cpu:0)\n",
+    "    s = v1 + v2            # pinned to /job:worker (defaults to task:0/cpu:0)\n",
+    "    with tf.device(\"/task:1\"):\n",
+    "        p1 = 2 * s         # pinned to /job:worker/task:1 (defaults to /cpu:0)\n",
+    "        with tf.device(\"/cpu:0\"):\n",
+    "            p2 = 3 * s     # pinned to /job:worker/task:1/cpu:0\n",
+    "\n",
+    "config = tf.ConfigProto()\n",
+    "config.log_device_placement = True\n",
+    "\n",
+    "with tf.Session(\"grpc://127.0.0.1:2221\", config=config) as sess:\n",
+    "    v1.initializer.run()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Readers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "test_csv = open(\"my_test.csv\", \"w\")\n",
+    "test_csv.write(\"x1, x2 , target\\n\")\n",
+    "test_csv.write(\"1.,    , 0\\n\")\n",
+    "test_csv.write(\"4., 5. , 1\\n\")\n",
+    "test_csv.write(\"7., 8. , 0\\n\")\n",
+    "test_csv.close()\n",
+    "\n",
+    "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n",
+    "filename = tf.placeholder(tf.string)\n",
+    "enqueue_filename = filename_queue.enqueue([filename])\n",
+    "close_filename_queue = filename_queue.close()\n",
+    "\n",
+    "reader = tf.TextLineReader(skip_header_lines=1)\n",
+    "key, value = reader.read(filename_queue)\n",
+    "\n",
+    "x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])\n",
+    "features = tf.pack([x1, x2])\n",
+    "\n",
+    "instance_queue = tf.RandomShuffleQueue(\n",
+    "    capacity=10, min_after_dequeue=2,\n",
+    "    dtypes=[tf.float32, tf.int32], shapes=[[2],[]],\n",
+    "    name=\"instance_q\", shared_name=\"shared_instance_q\")\n",
+    "enqueue_instance = instance_queue.enqueue([features, target])\n",
+    "close_instance_queue = instance_queue.close()\n",
+    "\n",
+    "minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(enqueue_filename, feed_dict={filename: \"my_test.csv\"})\n",
+    "    sess.run(close_filename_queue)\n",
+    "    try:\n",
+    "        while True:\n",
+    "            sess.run(enqueue_instance)\n",
+    "    except tf.errors.OutOfRangeError as ex:\n",
+    "        print(\"No more files to read\")\n",
+    "    sess.run(close_instance_queue)\n",
+    "    try:\n",
+    "        while True:\n",
+    "            print(sess.run([minibatch_instances, minibatch_targets]))\n",
+    "    except tf.errors.OutOfRangeError as ex:\n",
+    "        print(\"No more training instances\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#coord = tf.train.Coordinator()\n",
+    "#threads = tf.train.start_queue_runners(coord=coord)\n",
+    "#filename_queue = tf.train.string_input_producer([\"test.csv\"])\n",
+    "#coord.request_stop()\n",
+    "#coord.join(threads)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Queue runners and coordinators"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n",
+    "filename = tf.placeholder(tf.string)\n",
+    "enqueue_filename = filename_queue.enqueue([filename])\n",
+    "close_filename_queue = filename_queue.close()\n",
+    "\n",
+    "reader = tf.TextLineReader(skip_header_lines=1)\n",
+    "key, value = reader.read(filename_queue)\n",
+    "\n",
+    "x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])\n",
+    "features = tf.pack([x1, x2])\n",
+    "\n",
+    "instance_queue = tf.RandomShuffleQueue(\n",
+    "    capacity=10, min_after_dequeue=2,\n",
+    "    dtypes=[tf.float32, tf.int32], shapes=[[2],[]],\n",
+    "    name=\"instance_q\", shared_name=\"shared_instance_q\")\n",
+    "enqueue_instance = instance_queue.enqueue([features, target])\n",
+    "close_instance_queue = instance_queue.close()\n",
+    "\n",
+    "minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)\n",
+    "\n",
+    "n_threads = 5\n",
+    "queue_runner = tf.train.QueueRunner(instance_queue, [enqueue_instance] * n_threads)\n",
+    "coord = tf.train.Coordinator()\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(enqueue_filename, feed_dict={filename: \"my_test.csv\"})\n",
+    "    sess.run(close_filename_queue)\n",
+    "    enqueue_threads = queue_runner.create_threads(sess, coord=coord, start=True)\n",
+    "    try:\n",
+    "        while True:\n",
+    "            print(sess.run([minibatch_instances, minibatch_targets]))\n",
+    "    except tf.errors.OutOfRangeError as ex:\n",
+    "        print(\"No more training instances\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "def read_and_push_instance(filename_queue, instance_queue):\n",
+    "    reader = tf.TextLineReader(skip_header_lines=1)\n",
+    "    key, value = reader.read(filename_queue)\n",
+    "    x1, x2, target = tf.decode_csv(value, record_defaults=[[-1.], [-1.], [-1]])\n",
+    "    features = tf.pack([x1, x2])\n",
+    "    enqueue_instance = instance_queue.enqueue([features, target])\n",
+    "    return enqueue_instance\n",
+    "\n",
+    "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n",
+    "filename = tf.placeholder(tf.string)\n",
+    "enqueue_filename = filename_queue.enqueue([filename])\n",
+    "close_filename_queue = filename_queue.close()\n",
+    "\n",
+    "instance_queue = tf.RandomShuffleQueue(\n",
+    "    capacity=10, min_after_dequeue=2,\n",
+    "    dtypes=[tf.float32, tf.int32], shapes=[[2],[]],\n",
+    "    name=\"instance_q\", shared_name=\"shared_instance_q\")\n",
+    "\n",
+    "minibatch_instances, minibatch_targets = instance_queue.dequeue_up_to(2)\n",
+    "\n",
+    "read_and_enqueue_ops = [read_and_push_instance(filename_queue, instance_queue) for i in range(5)]\n",
+    "queue_runner = tf.train.QueueRunner(instance_queue, read_and_enqueue_ops)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(enqueue_filename, feed_dict={filename: \"my_test.csv\"})\n",
+    "    sess.run(close_filename_queue)\n",
+    "    coord = tf.train.Coordinator()\n",
+    "    enqueue_threads = queue_runner.create_threads(sess, coord=coord, start=True)\n",
+    "    try:\n",
+    "        while True:\n",
+    "            print(sess.run([minibatch_instances, minibatch_targets]))\n",
+    "    except tf.errors.OutOfRangeError as ex:\n",
+    "        print(\"No more training instances\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setting a timeout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "q = tf.FIFOQueue(capacity=10, dtypes=[tf.float32], shapes=[()])\n",
+    "v = tf.placeholder(tf.float32)\n",
+    "enqueue = q.enqueue([v])\n",
+    "dequeue = q.dequeue()\n",
+    "output = dequeue + 1\n",
+    "\n",
+    "config = tf.ConfigProto()\n",
+    "config.operation_timeout_in_ms = 1000\n",
+    "\n",
+    "with tf.Session(config=config) as sess:\n",
+    "    sess.run(enqueue, feed_dict={v: 1.0})\n",
+    "    sess.run(enqueue, feed_dict={v: 2.0})\n",
+    "    sess.run(enqueue, feed_dict={v: 3.0})\n",
+    "    print(sess.run(output))\n",
+    "    print(sess.run(output, feed_dict={dequeue: 5}))\n",
+    "    print(sess.run(output))\n",
+    "    print(sess.run(output))\n",
+    "    try:\n",
+    "        print(sess.run(output))\n",
+    "    except tf.errors.DeadlineExceededError as ex:\n",
+    "        print(\"Timed out while dequeuing\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {},
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb
new file mode 100644
index 0000000..b4ea324
--- /dev/null
+++ b/13_convolutional_neural_networks.ipynb
@@ -0,0 +1,613 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 13 – Convolutional Neural Networks**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 13._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"cnn\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A couple utility functions to plot grayscale and RGB images:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def plot_image(image):\n",
+    "    plt.imshow(image, cmap=\"gray\", interpolation=\"nearest\")\n",
+    "    plt.axis(\"off\")\n",
+    "\n",
+    "def plot_color_image(image):\n",
+    "    plt.imshow(image.astype(np.uint8),interpolation=\"nearest\")\n",
+    "    plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And of course we will need TensorFlow:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Convolutional layer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_sample_images\n",
+    "dataset = load_sample_images()\n",
+    "china, flower = dataset.images\n",
+    "image = china[150:220, 130:250]\n",
+    "height, width, channels = image.shape\n",
+    "image_grayscale = image.mean(axis=2).astype(np.float32)\n",
+    "images = image_grayscale.reshape(1, height, width, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fmap = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32)\n",
+    "fmap[:, 3, 0, 0] = 1\n",
+    "fmap[3, :, 0, 1] = 1\n",
+    "fmap[:, :, 0, 0]\n",
+    "plot_image(fmap[:, :, 0, 0])\n",
+    "plt.show()\n",
+    "plot_image(fmap[:, :, 0, 1])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, height, width, 1))\n",
+    "feature_maps = tf.constant(fmap)\n",
+    "convolution = tf.nn.conv2d(X, feature_maps, strides=[1,1,1,1], padding=\"SAME\", use_cudnn_on_gpu=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    output = convolution.eval(feed_dict={X: images})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot_image(images[0, :, :, 0])\n",
+    "save_fig(\"china_original\", tight_layout=False)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot_image(output[0, :, :, 0])\n",
+    "save_fig(\"china_vertical\", tight_layout=False)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot_image(output[0, :, :, 1])\n",
+    "save_fig(\"china_horizontal\", tight_layout=False)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Simple example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_sample_images\n",
+    "dataset = np.array(load_sample_images().images, dtype=np.float32)\n",
+    "batch_size, height, width, channels = dataset.shape\n",
+    "\n",
+    "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n",
+    "filters[:, 3, :, 0] = 1  # vertical line\n",
+    "filters[3, :, :, 1] = 1  # horizontal line\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n",
+    "convolution = tf.nn.conv2d(X, filters, strides=[1,2,2,1], padding=\"SAME\")\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    output = sess.run(convolution, feed_dict={X: dataset})\n",
+    "\n",
+    "for image_index in (0, 1):\n",
+    "    for feature_map_index in (0, 1):\n",
+    "        plot_image(output[image_index, :, :, feature_map_index])\n",
+    "        plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## VALID vs SAME padding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "filter_primes = np.array([2., 3., 5., 7., 11., 13.], dtype=np.float32)\n",
+    "x = tf.constant(np.arange(1, 13+1, dtype=np.float32).reshape([1, 1, 13, 1]))\n",
+    "filters = tf.constant(filter_primes.reshape(1, 6, 1, 1))\n",
+    "\n",
+    "valid_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='VALID')\n",
+    "same_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='SAME')\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    print(\"VALID:\\n\", valid_conv.eval())\n",
+    "    print(\"SAME:\\n\", same_conv.eval())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(\"VALID:\")\n",
+    "print(np.array([1,2,3,4,5,6]).T.dot(filter_primes))\n",
+    "print(np.array([6,7,8,9,10,11]).T.dot(filter_primes))\n",
+    "print(\"SAME:\")\n",
+    "print(np.array([0,1,2,3,4,5]).T.dot(filter_primes))\n",
+    "print(np.array([5,6,7,8,9,10]).T.dot(filter_primes))\n",
+    "print(np.array([10,11,12,13,0,0]).T.dot(filter_primes))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pooling layer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_sample_images\n",
+    "dataset = np.array(load_sample_images().images, dtype=np.float32)\n",
+    "batch_size, height, width, channels = dataset.shape\n",
+    "\n",
+    "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n",
+    "filters[:, 3, :, 0] = 1  # vertical line\n",
+    "filters[3, :, :, 1] = 1  # horizontal line\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n",
+    "max_pool = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1,2,2,1], padding=\"VALID\")\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    output = sess.run(max_pool, feed_dict={X: dataset})\n",
+    "\n",
+    "plot_color_image(dataset[0])\n",
+    "save_fig(\"china_original\")\n",
+    "plt.show()\n",
+    "    \n",
+    "plot_color_image(output[0])\n",
+    "save_fig(\"china_max_pool\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MNIST"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import fetch_mldata\n",
+    "\n",
+    "mnist = fetch_mldata('MNIST original')\n",
+    "X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
+    "y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "height, width = 28, 28\n",
+    "images = X_test[5000].reshape(1, height, width, 1)\n",
+    "plot_image(images[0, :, :, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inception v3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import tarfile\n",
+    "import urllib.request\n",
+    "\n",
+    "TF_MODELS_URL = \"http://download.tensorflow.org/models\"\n",
+    "INCEPTION_V3_URL = TF_MODELS_URL + \"/inception_v3_2016_08_28.tar.gz\"\n",
+    "INCEPTION_PATH = os.path.join(\"datasets\", \"inception\")\n",
+    "INCEPTION_V3_CHECKPOINT_PATH = os.path.join(INCEPTION_PATH, \"inception_v3.ckpt\")\n",
+    "\n",
+    "def download_progress(count, block_size, total_size):\n",
+    "    percent = count * block_size * 100 // total_size\n",
+    "    sys.stdout.write(\"\\rDownloading: {}%\".format(percent))\n",
+    "    sys.stdout.flush()\n",
+    "\n",
+    "def fetch_pretrained_inception_v3(url=INCEPTION_V3_URL, path=INCEPTION_PATH):\n",
+    "    if os.path.exists(INCEPTION_V3_CHECKPOINT_PATH):\n",
+    "        return\n",
+    "    os.makedirs(path, exist_ok=True)\n",
+    "    tgz_path = os.path.join(path, \"inception_v3.tgz\")\n",
+    "    urllib.request.urlretrieve(url, tgz_path, reporthook=download_progress)\n",
+    "    inception_tgz = tarfile.open(tgz_path)\n",
+    "    inception_tgz.extractall(path=path)\n",
+    "    inception_tgz.close()\n",
+    "    os.remove(tgz_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fetch_pretrained_inception_v3()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "CLASS_NAME_REGEX = re.compile(r\"^n\\d+\\s+(.*)\\s*$\", re.M | re.U)\n",
+    "\n",
+    "def load_class_names():\n",
+    "    with open(os.path.join(\"datasets\",\"inception\",\"imagenet_class_names.txt\"), \"rb\") as f:\n",
+    "        content = f.read().decode(\"utf-8\")\n",
+    "        return CLASS_NAME_REGEX.findall(content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "class_names = load_class_names()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "width = 299\n",
+    "height = 299\n",
+    "channels = 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.image as mpimg\n",
+    "test_image = mpimg.imread(os.path.join(\"images\",\"cnn\",\"test_image.png\"))[:, :, :channels]\n",
+    "plt.imshow(test_image)\n",
+    "plt.axis(\"off\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from nets.inception_v3 import inception_v3, inception_v3_arg_scope\n",
+    "import tensorflow.contrib.slim as slim\n",
+    "\n",
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name=\"X\")\n",
+    "with slim.arg_scope(inception_v3_arg_scope()):\n",
+    "    logits, end_points = inception_v3(X, num_classes=1001, is_training=False)\n",
+    "predictions = end_points[\"Predictions\"]\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_test = test_image.reshape(-1, height, width, channels)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, INCEPTION_V3_CHECKPOINT_PATH)\n",
+    "    predictions_val = predictions.eval(feed_dict={X: X_test})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "class_names[np.argmax(predictions_val[0])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.argmax(predictions_val, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "top_5 = np.argpartition(predictions_val[0], -5)[-5:]\n",
+    "top_5 = top_5[np.argsort(predictions_val[0][top_5])]\n",
+    "for i in top_5:\n",
+    "    print(\"{0}: {1:.2f}%\".format(class_names[i], 100*predictions_val[0][i]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {},
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb
new file mode 100644
index 0000000..7f873cd
--- /dev/null
+++ b/14_recurrent_neural_networks.ipynb
@@ -0,0 +1,1326 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 14 – Recurrent Neural Networks**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 14._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"rnn\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then of course we will need TensorFlow:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Basic RNNs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Manual RNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 3\n",
+    "n_neurons = 5\n",
+    "\n",
+    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "\n",
+    "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n",
+    "Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n",
+    "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
+    "\n",
+    "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
+    "Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
+    "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(Y0_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(Y1_val)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using `rnn()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 3\n",
+    "n_neurons = 5\n",
+    "\n",
+    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "\n",
+    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
+    "output_seqs, states = tf.nn.rnn(basic_cell, [X0, X1], dtype=tf.float32)\n",
+    "Y0, Y1 = output_seqs\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
+    "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "Y0_val"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "Y1_val"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import clear_output, Image, display, HTML\n",
+    "\n",
+    "def strip_consts(graph_def, max_const_size=32):\n",
+    "    \"\"\"Strip large constant values from graph_def.\"\"\"\n",
+    "    strip_def = tf.GraphDef()\n",
+    "    for n0 in graph_def.node:\n",
+    "        n = strip_def.node.add() \n",
+    "        n.MergeFrom(n0)\n",
+    "        if n.op == 'Const':\n",
+    "            tensor = n.attr['value'].tensor\n",
+    "            size = len(tensor.tensor_content)\n",
+    "            if size > max_const_size:\n",
+    "                tensor.tensor_content = \"b<stripped %d bytes>\"%size\n",
+    "    return strip_def\n",
+    "\n",
+    "def show_graph(graph_def, max_const_size=32):\n",
+    "    \"\"\"Visualize TensorFlow graph.\"\"\"\n",
+    "    if hasattr(graph_def, 'as_graph_def'):\n",
+    "        graph_def = graph_def.as_graph_def()\n",
+    "    strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
+    "    code = \"\"\"\n",
+    "        <script>\n",
+    "          function load() {{\n",
+    "            document.getElementById(\"{id}\").pbtxt = {data};\n",
+    "          }}\n",
+    "        </script>\n",
+    "        <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
+    "        <div style=\"height:600px\">\n",
+    "          <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
+    "        </div>\n",
+    "    \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
+    "\n",
+    "    iframe = \"\"\"\n",
+    "        <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
+    "    \"\"\".format(code.replace('\"', '&quot;'))\n",
+    "    display(HTML(iframe))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_graph(tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Packing sequences"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_steps = 2\n",
+    "n_inputs = 3\n",
+    "n_neurons = 5\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "X_seqs = tf.unpack(tf.transpose(X, perm=[1, 0, 2]))\n",
+    "\n",
+    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
+    "output_seqs, states = tf.nn.rnn(basic_cell, X_seqs, dtype=tf.float32)\n",
+    "outputs = tf.transpose(tf.pack(output_seqs), perm=[1, 0, 2])\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_batch = np.array([\n",
+    "        # t = 0      t = 1 \n",
+    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
+    "        [[3, 4, 5], [0, 0, 0]], # instance 2\n",
+    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
+    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
+    "    ])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    outputs_val = outputs.eval(feed_dict={X: X_batch})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using `dynamic_rnn()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_steps = 2\n",
+    "n_inputs = 3\n",
+    "n_neurons = 5\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "\n",
+    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
+    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_batch = np.array([\n",
+    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
+    "        [[3, 4, 5], [0, 0, 0]], # instance 2\n",
+    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
+    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
+    "    ])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    print(\"outputs =\", outputs.eval(feed_dict={X: X_batch}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_graph(tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting the sequence lengths"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_steps = 2\n",
+    "n_inputs = 3\n",
+    "n_neurons = 5\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "seq_length = tf.placeholder(tf.int32, [None])\n",
+    "\n",
+    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
+    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_batch = np.array([\n",
+    "        # step 0     step 1\n",
+    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
+    "        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
+    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
+    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
+    "    ])\n",
+    "seq_length_batch = np.array([2, 1, 2, 2])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    outputs_val, states_val = sess.run(\n",
+    "        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(outputs_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "print(states_val)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training a sequence classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_steps = 28\n",
+    "n_inputs = 28\n",
+    "n_neurons = 150\n",
+    "n_outputs = 10\n",
+    "\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.int32, [None])\n",
+    "\n",
+    "with tf.variable_scope(\"\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n",
+    "    basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
+    "    outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "logits = fully_connected(states, n_outputs, activation_fn=None)\n",
+    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "loss = tf.reduce_mean(xentropy)\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.examples.tutorials.mnist import input_data\n",
+    "mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
+    "X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n",
+    "y_test = mnist.test.labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 100\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Multi-layer RNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_steps = 28\n",
+    "n_inputs = 28\n",
+    "n_neurons1 = 150\n",
+    "n_neurons2 = 100\n",
+    "n_outputs = 10\n",
+    "\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.int32, [None])\n",
+    "\n",
+    "hidden1 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n",
+    "hidden2 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n",
+    "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([hidden1, hidden2])\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "logits = fully_connected(states, n_outputs, activation_fn=None)\n",
+    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "loss = tf.reduce_mean(xentropy)\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 100\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Time series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "t_min, t_max = 0, 30\n",
+    "resolution = 0.1\n",
+    "\n",
+    "def time_series(t):\n",
+    "    return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
+    "\n",
+    "def next_batch(batch_size, n_steps):\n",
+    "    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
+    "    Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
+    "    ys = time_series(Ts)\n",
+    "    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n",
+    "\n",
+    "n_steps = 20\n",
+    "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
+    "\n",
+    "plt.figure(figsize=(11,4))\n",
+    "plt.subplot(121)\n",
+    "plt.title(\"A time series (generated)\", fontsize=14)\n",
+    "plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
+    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
+    "plt.legend(loc=\"lower left\", fontsize=14)\n",
+    "plt.axis([0, 30, -17, 13])\n",
+    "plt.xlabel(\"Time\")\n",
+    "plt.ylabel(\"Value\")\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.title(\"A training instance\", fontsize=14)\n",
+    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
+    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.xlabel(\"Time\")\n",
+    "\n",
+    "\n",
+    "save_fig(\"time_series_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_batch, y_batch = next_batch(1, n_steps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "np.c_[X_batch[0], y_batch[0]]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using an `OuputProjectionWrapper`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_steps = 20\n",
+    "n_inputs = 1\n",
+    "n_neurons = 100\n",
+    "n_outputs = 1\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
+    "\n",
+    "cell = tf.nn.rnn_cell.OutputProjectionWrapper(\n",
+    "    tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
+    "    output_size=n_outputs)\n",
+    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n",
+    "\n",
+    "n_outputs = 1\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "loss = tf.reduce_sum(tf.square(outputs - y))\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 1000\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for iteration in range(n_iterations):\n",
+    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
+    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        if iteration % 100 == 0:\n",
+    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "            print(iteration, \"\\tMSE:\", mse)\n",
+    "    \n",
+    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
+    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
+    "    print(y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.title(\"Testing the model\", fontsize=14)\n",
+    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
+    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
+    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.xlabel(\"Time\")\n",
+    "\n",
+    "save_fig(\"time_series_pred_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Without using an `OutputProjectionWrapper`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_steps = 20\n",
+    "n_inputs = 1\n",
+    "n_neurons = 100\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
+    "\n",
+    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
+    "rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "n_outputs = 1\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
+    "stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
+    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
+    "\n",
+    "loss = tf.reduce_sum(tf.square(outputs - y))\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 1000\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for iteration in range(n_iterations):\n",
+    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
+    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        if iteration % 100 == 0:\n",
+    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "            print(iteration, \"\\tMSE:\", mse)\n",
+    "    \n",
+    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
+    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
+    "    print(y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plt.title(\"Testing the model\", fontsize=14)\n",
+    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
+    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
+    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.xlabel(\"Time\")\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generating a creative new sequence"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 2000\n",
+    "batch_size = 50\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for iteration in range(n_iterations):\n",
+    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
+    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        if iteration % 100 == 0:\n",
+    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "            print(iteration, \"\\tMSE:\", mse)\n",
+    "\n",
+    "    sequence1 = [0. for i in range(n_steps)]\n",
+    "    for iteration in range(len(t) - n_steps):\n",
+    "        X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
+    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
+    "        sequence1.append(y_pred[0, -1, 0])\n",
+    "\n",
+    "    sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
+    "    for iteration in range(len(t) - n_steps):\n",
+    "        X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
+    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
+    "        sequence2.append(y_pred[0, -1, 0])\n",
+    "\n",
+    "plt.figure(figsize=(11,4))\n",
+    "plt.subplot(121)\n",
+    "plt.plot(t, sequence1, \"b-\")\n",
+    "plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
+    "plt.xlabel(\"Time\")\n",
+    "plt.ylabel(\"Value\")\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.plot(t, sequence2, \"b-\")\n",
+    "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
+    "plt.xlabel(\"Time\")\n",
+    "#save_fig(\"creative_sequence_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Deep RNN"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## MultiRNNCell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 2\n",
+    "n_neurons = 100\n",
+    "n_layers = 3\n",
+    "n_steps = 5\n",
+    "keep_prob = 0.5\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
+    "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([basic_cell] * n_layers)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "X_batch = rnd.rand(2, n_steps, n_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "outputs_val.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Dropout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_inputs = 1\n",
+    "n_neurons = 100\n",
+    "n_layers = 3\n",
+    "n_steps = 20\n",
+    "n_outputs = 1\n",
+    "\n",
+    "keep_prob = 0.5\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "is_training = True\n",
+    "\n",
+    "def deep_rnn_with_dropout(X, y, is_training):\n",
+    "    cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
+    "    if is_training:\n",
+    "        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
+    "    multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers)\n",
+    "    rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "    stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
+    "    stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
+    "    outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
+    "\n",
+    "    loss = tf.reduce_sum(tf.square(outputs - y))\n",
+    "    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "    training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "    return outputs, loss, training_op\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
+    "outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 2000\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    if is_training:\n",
+    "        init.run()\n",
+    "        for iteration in range(n_iterations):\n",
+    "            X_batch, y_batch = next_batch(batch_size, n_steps)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "            if iteration % 100 == 0:\n",
+    "                mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "                print(iteration, \"\\tMSE:\", mse)\n",
+    "        save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
+    "    else:\n",
+    "        saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
+    "        X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
+    "        y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
+    "        \n",
+    "        plt.title(\"Testing the model\", fontsize=14)\n",
+    "        plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
+    "        plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
+    "        plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
+    "        plt.legend(loc=\"upper left\")\n",
+    "        plt.xlabel(\"Time\")\n",
+    "        plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LSTM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_steps = 28\n",
+    "n_inputs = 28\n",
+    "n_neurons = 150\n",
+    "n_outputs = 10\n",
+    "\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.int32, [None])\n",
+    "\n",
+    "lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons, state_is_tuple=True)\n",
+    "multi_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*3, state_is_tuple=True)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
+    "top_layer_h_state = states[-1][1]\n",
+    "logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n",
+    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "states"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "top_layer_h_state"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(len(mnist.test.labels)//batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
+    "        print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Distributing layers across devices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "\n",
+    "class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):\n",
+    "  def __init__(self, device, cell):\n",
+    "    self._cell = cell\n",
+    "    self._device = device\n",
+    "\n",
+    "  @property\n",
+    "  def state_size(self):\n",
+    "    return self._cell.state_size\n",
+    "\n",
+    "  @property\n",
+    "  def output_size(self):\n",
+    "    return self._cell.output_size\n",
+    "\n",
+    "  def __call__(self, inputs, state, scope=None):\n",
+    "    with tf.device(self._device):\n",
+    "        return self._cell(inputs, state, scope)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 5\n",
+    "n_neurons = 100\n",
+    "devices = [\"/cpu:0\"]*5\n",
+    "n_steps = 20\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n",
+    "lstm_cells = [DeviceCellWrapper(device, tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons))\n",
+    "              for device in devices]\n",
+    "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {},
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/classification.ipynb b/classification.ipynb
index 4c7cc98..ea7f79b 100644
--- a/classification.ipynb
+++ b/classification.ipynb
@@ -4,7 +4,23 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Classification**"
+    "**Chapter 3 – Classification**\n",
+    "\n",
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 3._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
    ]
   },
   {
@@ -15,14 +31,18 @@
    },
    "outputs": [],
    "source": [
+    "# To support both python 2 and python 3\n",
     "from __future__ import division, print_function, unicode_literals\n",
     "\n",
+    "# Common imports\n",
     "import numpy as np\n",
     "import numpy.random as rnd\n",
-    "rnd.seed(42) # to make this notebook's output stable across runs\n",
-    "\n",
     "import os\n",
     "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
     "%matplotlib inline\n",
     "import matplotlib\n",
     "import matplotlib.pyplot as plt\n",
@@ -30,6 +50,7 @@
     "plt.rcParams['xtick.labelsize'] = 12\n",
     "plt.rcParams['ytick.labelsize'] = 12\n",
     "\n",
+    "# Where to save the figures\n",
     "PROJECT_ROOT_DIR = \".\"\n",
     "CHAPTER_ID = \"classification\"\n",
     "\n",
@@ -122,7 +143,7 @@
     "some_digit_index = 36000\n",
     "some_digit = X[some_digit_index]\n",
     "plot_digit(some_digit)\n",
-    "save_fig(\"some_digit\")\n",
+    "save_fig(\"some_digit_plot\")\n",
     "plt.show()"
    ]
   },
@@ -153,7 +174,7 @@
     "plt.figure(figsize=(9,9))\n",
     "example_images = np.r_[X[:12000:600], X[13000:30600:600], X[30600:60000:590]]\n",
     "plot_digits(example_images, images_per_row=10)\n",
-    "save_fig(\"more_digits\")\n",
+    "save_fig(\"more_digits_plot\")\n",
     "plt.show()"
    ]
   },
@@ -980,7 +1001,7 @@
     "some_index = 5500\n",
     "plt.subplot(121); plot_digit(X_test_mod[some_index])\n",
     "plt.subplot(122); plot_digit(y_test_mod[some_index])\n",
-    "save_fig(\"noisy_digit_example\")\n",
+    "save_fig(\"noisy_digit_example_plot\")\n",
     "plt.show()"
    ]
   },
@@ -1005,7 +1026,7 @@
    "source": [
     "clean_digit = knn_clf.predict([X_test_mod[some_index]])\n",
     "plot_digit(clean_digit)\n",
-    "save_fig(\"cleaned_digit_example\")\n",
+    "save_fig(\"cleaned_digit_example_plot\")\n",
     "plt.show()"
    ]
   },
@@ -1183,6 +1204,31 @@
    "source": [
     "plot_digit(ambiguous_digit)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Coming soon**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -1203,10 +1249,14 @@
    "pygments_lexer": "ipython3",
    "version": "3.5.1"
   },
+  "nav_menu": {},
   "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
    "toc_cell": false,
-   "toc_number_sections": true,
-   "toc_threshold": 6,
+   "toc_section_display": "block",
    "toc_window_display": false
   }
  },
diff --git a/images/ann/README b/images/ann/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/ann/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/autoencoders/README b/images/autoencoders/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/autoencoders/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/cnn/README b/images/cnn/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/cnn/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/cnn/test_image.png b/images/cnn/test_image.png
new file mode 100644
index 0000000..2d53756
Binary files /dev/null and b/images/cnn/test_image.png differ
diff --git a/images/decision_trees/README b/images/decision_trees/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/decision_trees/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/deep/README b/images/deep/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/deep/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/dim_reduction/README b/images/dim_reduction/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/dim_reduction/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/distributed/README b/images/distributed/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/distributed/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/ensembles/README b/images/ensembles/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/ensembles/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/rl/README b/images/rl/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/rl/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/rnn/README b/images/rnn/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/rnn/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/svm/README b/images/svm/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/svm/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/images/tensorflow/README b/images/tensorflow/README
new file mode 100644
index 0000000..1c4283b
--- /dev/null
+++ b/images/tensorflow/README
@@ -0,0 +1 @@
+Images generated by the notebooks
diff --git a/index.ipynb b/index.ipynb
index c9878a6..a1c72fe 100644
--- a/index.ipynb
+++ b/index.ipynb
@@ -16,19 +16,29 @@
     "\n",
     "### To run the examples\n",
     "* **Jupyter** – These notebooks are based on Jupyter. If you just plan to read without running any code, there's really nothing more to know, just keep reading! But if you want to experiment with the code examples you need to:\n",
-    "    * open these notebooks in Jupyter. If you clicked on the \"launch binder\" button in github or followed the Installation instructions, then you are good to go. If not you will need to go back to the project [home page](https://github.com/ageron/ml-notebooks/) and click on \"launch binder\" or follow the installation instructions.\n",
+    "    * open these notebooks in Jupyter. If you clicked on the \"launch binder\" button in github or followed the Installation instructions, then you are good to go. If not you will need to go back to the project [home page](https://github.com/ageron/handson-ml/) and click on \"launch binder\" or follow the installation instructions.\n",
     "    * learn how to use Jupyter. Start the User Interface Tour from the Help menu.\n",
     "\n",
     "### To activate extensions\n",
-    "* If this is an interactive session (see above), you may want to turn on a few Jupyter extensions by going to the [Extension Configuration](../nbextensions/) page. In particular the \"*table of contents (2)*\" extension is quite useful.\n",
+    "* If this is an interactive session (see above), you may want to turn on a few Jupyter extensions by going to the [Extension Configuration](../nbextensions/) page. In particular the \"*Table of Contents (2)*\" extension is quite useful.\n",
     "\n",
-    "## Chapters\n",
-    "1. [Fundamentals](fundamentals.ipynb)\n",
-    "2. [End-to-end project](end_to_end_project.ipynb)\n",
-    "3. [Classification](classification.ipynb)\n",
-    "4. [Training Linear Models](training_linear_models.ipynb)\n",
-    "\n",
-    "More explanations and chapters coming soon.\n",
+    "## Notebooks\n",
+    "1. [The Machine Learning landscape](01_the_machine_learning_landscape.ipynb)\n",
+    "2. [End-to-end Machine Learning project](02_end_to_end_machine_learning_project.ipynb)\n",
+    "3. [Classification](03_classification.ipynb)\n",
+    "4. [Training Linear Models](04_training_linear_models.ipynb)\n",
+    "5. [Support Vector Machines](05_support_vector_machines.ipynb)\n",
+    "6. [Decision Trees](06_decision_trees.ipynb)\n",
+    "7. [Ensemble Learning and Random Forests](07_ensemble_learning_and_random_forests.ipynb)\n",
+    "8. [Dimensionality Reduction](08_dimensionality_reduction.ipynb)\n",
+    "9. [Up and running with TensorFlow](09_up_and_running_with_tensorflow.ipynb)\n",
+    "10. [Introduction to Artificial Neural Networks](10_introduction_to_artificial_neural_networks.ipynb)\n",
+    "11. [Deep Learning](11_deep_learning.ipynb)\n",
+    "12. [Distributed TensorFlow](12_distributed_tensorflow.ipynb)\n",
+    "13. [Convolutional Neural Networks](13_convolutional_neural_networks.ipynb)\n",
+    "14. [Recurrent Neural Networks](14_recurrent_neural_networks.ipynb)\n",
+    "15. Autoencoders (coming soon)\n",
+    "16. Reinforcement Learning (coming soon)\n",
     "\n",
     "## Scientific Python tutorials\n",
     "* [NumPy](tools_numpy.ipynb)\n",
@@ -39,6 +49,15 @@
     "* [Linear Algebra](math_linear_algebra.ipynb)\n",
     "* Calculus (coming soon)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -59,10 +78,14 @@
    "pygments_lexer": "ipython2",
    "version": "2.7.11"
   },
+  "nav_menu": {},
   "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
    "toc_cell": false,
-   "toc_number_sections": true,
-   "toc_threshold": 6,
+   "toc_section_display": "block",
    "toc_window_display": false
   }
  },
diff --git a/nets/inception_v3.py b/nets/inception_v3.py
index d5a1fe3..2897a4b 100644
--- a/nets/inception_v3.py
+++ b/nets/inception_v3.py
@@ -94,7 +94,9 @@ def inception_v3_base(inputs,
     raise ValueError('depth_multiplier is not greater than zero.')
   depth = lambda d: max(int(d * depth_multiplier), min_depth)
 
-  with tf.variable_scope(scope, 'InceptionV3', [inputs]):
+  #Backported to 0.10.0
+  #with tf.variable_scope(scope, 'InceptionV3', [inputs]):
+  with tf.variable_scope(scope or 'InceptionV3'):
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                         stride=1, padding='VALID'):
       # 299 x 299 x 3
@@ -470,8 +472,10 @@ def inception_v3(inputs,
     raise ValueError('depth_multiplier is not greater than zero.')
   depth = lambda d: max(int(d * depth_multiplier), min_depth)
 
-  with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes],
-                         reuse=reuse) as scope:
+  #Backported to 0.10.0
+  #with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes],
+  #                       reuse=reuse) as scope:
+  with tf.variable_scope(scope or 'InceptionV3', reuse=reuse) as scope:
     with slim.arg_scope([slim.batch_norm, slim.dropout],
                         is_training=is_training):
       net, end_points = inception_v3_base(