Add exercise solutions for chapter 10, and improve code sync between book and notebook

2017-06-02 22:01:22 +02:00 · 2017-06-02 22:01:22 +02:00 · 6281e153cd
commit 6281e153cd
parent 3680f6a27c
1 changed files with 592 additions and 113 deletions
--- a/10_introduction_to_artificial_neural_networks.ipynb
+++ b/10_introduction_to_artificial_neural_networks.ipynb
@ -44,7 +44,7 @@
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
-    "collapsed": true,
+    "collapsed": false,
    "deletable": true,
    "editable": true
   },
@ -101,10 +101,18 @@
   },
   "outputs": [],
   "source": [
+    "import numpy as np\n",
    "from sklearn.datasets import load_iris\n",
+    "from sklearn.linear_model import Perceptron\n",
+    "\n",
    "iris = load_iris()\n",
    "X = iris.data[:, (2, 3)]  # petal length, petal width\n",
-    "y = (iris.target == 0).astype(np.int)"
+    "y = (iris.target == 0).astype(np.int)\n",
+    "\n",
+    "per_clf = Perceptron(random_state=42)\n",
+    "per_clf.fit(X, y)\n",
+    "\n",
+    "y_pred = per_clf.predict([[2, 0.5]])"
   ]
  },
  {
@ -117,12 +125,6 @@
   },
   "outputs": [],
   "source": [
-    "from sklearn.linear_model import Perceptron\n",
-    "\n",
-    "per_clf = Perceptron(random_state=42)\n",
-    "per_clf.fit(X, y)\n",
-    "\n",
-    "y_pred = per_clf.predict([[2, 0.5]])\n",
    "y_pred"
   ]
  },
@ -322,29 +324,24 @@
   "outputs": [],
   "source": [
    "from tensorflow.examples.tutorials.mnist import input_data\n",
-    "mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
-    "X_train = mnist.train.images\n",
-    "X_test = mnist.test.images\n",
-    "y_train = mnist.train.labels.astype(\"int\")\n",
-    "y_test = mnist.test.labels.astype(\"int\")"
+    "\n",
+    "mnist = input_data.read_data_sets(\"/tmp/data/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
-    "collapsed": false,
+    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
-    "import tensorflow as tf\n",
-    "\n",
-    "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
-    "dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=10,\n",
-    "                                         feature_columns=feature_columns)\n",
-    "dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40000)"
+    "X_train = mnist.train.images\n",
+    "X_test = mnist.test.images\n",
+    "y_train = mnist.train.labels.astype(\"int\")\n",
+    "y_test = mnist.test.labels.astype(\"int\")"
   ]
  },
  {
@ -357,11 +354,13 @@
   },
   "outputs": [],
   "source": [
-    "from sklearn.metrics import accuracy_score\n",
+    "import tensorflow as tf\n",
    "\n",
-    "y_pred = list(dnn_clf.predict(X_test))\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "accuracy"
+    "feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
+    "dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=10,\n",
+    "                                         feature_columns=feature_cols)\n",
+    "dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) # if TensorFlow >= 1.1\n",
+    "dnn_clf.fit(X_train, y_train, batch_size=50, steps=40000)"
   ]
  },
  {
@ -374,10 +373,10 @@
   },
   "outputs": [],
   "source": [
-    "from sklearn.metrics import log_loss\n",
+    "from sklearn.metrics import accuracy_score\n",
    "\n",
-    "y_pred_proba = list(dnn_clf.predict_proba(X_test))\n",
-    "log_loss(y_test, y_pred_proba)"
+    "y_pred = dnn_clf.predict(X_test)\n",
+    "accuracy_score(y_test, y_pred['classes'])"
   ]
  },
  {
@ -390,7 +389,10 @@
   },
   "outputs": [],
   "source": [
-    "dnn_clf.evaluate(X_test, y_test)"
+    "from sklearn.metrics import log_loss\n",
+    "\n",
+    "y_pred_proba = y_pred['probabilities']\n",
+    "log_loss(y_test, y_pred_proba)"
   ]
  },
  {
@ -408,7 +410,7 @@
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
-    "collapsed": true,
+    "collapsed": false,
    "deletable": true,
    "editable": true
   },
@ -416,18 +418,10 @@
   "source": [
    "import tensorflow as tf\n",
    "\n",
-    "def neuron_layer(X, n_neurons, name, activation=None):\n",
-    "    with tf.name_scope(name):\n",
-    "        n_inputs = int(X.get_shape()[1])\n",
-    "        stddev = 1 / np.sqrt(n_inputs)\n",
-    "        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n",
-    "        W = tf.Variable(init, name=\"weights\")\n",
-    "        b = tf.Variable(tf.zeros([n_neurons]), name=\"biases\")\n",
-    "        Z = tf.matmul(X, W) + b\n",
-    "        if activation==\"relu\":\n",
-    "            return tf.nn.relu(Z)\n",
-    "        else:\n",
-    "            return Z"
+    "n_inputs = 28*28  # MNIST\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 100\n",
+    "n_outputs = 10"
   ]
  },
  {
@ -442,60 +436,32 @@
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
-    "n_inputs = 28*28  # MNIST\n",
-    "n_hidden1 = 300\n",
-    "n_hidden2 = 100\n",
-    "n_outputs = 10\n",
-    "learning_rate = 0.01\n",
-    "\n",
    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
-    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
-    "\n",
-    "with tf.name_scope(\"dnn\"):\n",
-    "    hidden1 = neuron_layer(X, n_hidden1, \"hidden1\", activation=\"relu\")\n",
-    "    hidden2 = neuron_layer(hidden1, n_hidden2, \"hidden2\", activation=\"relu\")\n",
-    "    logits = neuron_layer(hidden2, n_outputs, \"output\")\n",
-    "\n",
-    "with tf.name_scope(\"loss\"):\n",
-    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
-    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
-    "\n",
-    "with tf.name_scope(\"train\"):\n",
-    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
-    "    training_op = optimizer.minimize(loss)\n",
-    "\n",
-    "with tf.name_scope(\"eval\"):\n",
-    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
-    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
-    "    \n",
-    "init = tf.global_variables_initializer()\n",
-    "saver = tf.train.Saver()"
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
-    "collapsed": false,
+    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
-    "n_epochs = 20\n",
-    "batch_size = 50\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    init.run()\n",
-    "    for epoch in range(n_epochs):\n",
-    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
-    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
-    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
-    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
-    "        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
-    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
-    "\n",
-    "    save_path = saver.save(sess, \"./my_model_final.ckpt\")"
+    "def neuron_layer(X, n_neurons, name, activation=None):\n",
+    "    with tf.name_scope(name):\n",
+    "        n_inputs = int(X.get_shape()[1])\n",
+    "        stddev = 2 / np.sqrt(n_inputs)\n",
+    "        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n",
+    "        W = tf.Variable(init, name=\"kernel\")\n",
+    "        b = tf.Variable(tf.zeros([n_neurons]), name=\"bias\")\n",
+    "        Z = tf.matmul(X, W) + b\n",
+    "        if activation is not None:\n",
+    "            return activation(Z)\n",
+    "        else:\n",
+    "            return Z"
   ]
  },
  {
@ -508,12 +474,12 @@
   },
   "outputs": [],
   "source": [
-    "with tf.Session() as sess:\n",
-    "    saver.restore(sess, save_path) #\"my_model_final.ckpt\")\n",
-    "    X_new_scaled = mnist.test.images[:20]\n",
-    "    Z = logits.eval(feed_dict={X: X_new_scaled})\n",
-    "    print(np.argmax(Z, axis=1))\n",
-    "    print(mnist.test.labels[:20])"
+    "with tf.name_scope(\"dnn\"):\n",
+    "    hidden1 = neuron_layer(X, n_hidden1, name=\"hidden1\",\n",
+    "                           activation=tf.nn.relu)\n",
+    "    hidden2 = neuron_layer(hidden1, n_hidden2, name=\"hidden2\",\n",
+    "                           activation=tf.nn.relu)\n",
+    "    logits = neuron_layer(hidden2, n_outputs, name=\"outputs\")"
   ]
  },
  {
@ -525,6 +491,137 @@
    "editable": true
   },
   "outputs": [],
+   "source": [
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
+    "                                                              logits=logits)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 0.01\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
+    "    training_op = optimizer.minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "init = tf.global_variables_initializer()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 40\n",
+    "batch_size = 50"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,\n",
+    "                                            y: mnist.test.labels})\n",
+    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
+    "\n",
+    "    save_path = saver.save(sess, \"./my_model_final.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"./my_model_final.ckpt\") # or better, use save_path\n",
+    "    X_new_scaled = mnist.test.images[:20]\n",
+    "    Z = logits.eval(feed_dict={X: X_new_scaled})\n",
+    "    y_pred = np.argmax(Z, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "print(\"Predicted classes:\", y_pred)\n",
+    "print(\"Actual classes:   \", mnist.test.labels[:20])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
   "source": [
    "from IPython.display import clear_output, Image, display, HTML\n",
    "\n",
@ -566,7 +663,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 27,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -589,7 +686,10 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
   "source": [
    "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n",
    "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
@ -599,7 +699,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -607,41 +707,102 @@
   },
   "outputs": [],
   "source": [
-    "tf.reset_default_graph()\n",
-    "\n",
    "n_inputs = 28*28  # MNIST\n",
    "n_hidden1 = 300\n",
    "n_hidden2 = 100\n",
-    "n_outputs = 10\n",
-    "learning_rate = 0.01\n",
+    "n_outputs = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
    "\n",
    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
-    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
-    "\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
    "with tf.name_scope(\"dnn\"):\n",
-    "    hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\", activation=tf.nn.relu)\n",
-    "    hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\", activation=tf.nn.relu)\n",
-    "    logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n",
-    "\n",
+    "    hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\",\n",
+    "                              activation=tf.nn.relu)\n",
+    "    hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\",\n",
+    "                              activation=tf.nn.relu)\n",
+    "    logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
    "with tf.name_scope(\"loss\"):\n",
    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
-    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 0.01\n",
    "\n",
    "with tf.name_scope(\"train\"):\n",
    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
-    "    training_op = optimizer.minimize(loss)\n",
-    "\n",
+    "    training_op = optimizer.minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
    "with tf.name_scope(\"eval\"):\n",
    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
-    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
-    "    \n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
    "init = tf.global_variables_initializer()\n",
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 35,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -667,7 +828,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -696,16 +857,334 @@
    "editable": true
   },
   "source": [
-    "**Coming soon**"
+    "## 1. to 8."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "See appendix A."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 9."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Train a deep MLP on the MNIST dataset and see if you can get over 98% precision. Just like in the last exercise of chapter 9, try adding all the bells and whistles (i.e., save checkpoints, restore the last checkpoint in case of an interruption, add summaries, plot learning curves using TensorBoard, and so on)._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First let's create the deep net. It's exactly the same as earlier, with just one addition: we add a `tf.summary.scalar()` to track the loss and the accuracy during training, so we can view nice learning curves using TensorBoard."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_inputs = 28*28  # MNIST\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 100\n",
+    "n_outputs = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
+    "y = tf.placeholder(tf.int64, shape=(None), name=\"y\") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.name_scope(\"dnn\"):\n",
+    "    hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\",\n",
+    "                              activation=tf.nn.relu)\n",
+    "    hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\",\n",
+    "                              activation=tf.nn.relu)\n",
+    "    logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.name_scope(\"loss\"):\n",
+    "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
+    "    loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "    loss_summary = tf.summary.scalar('log_loss', loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 0.01\n",
+    "\n",
+    "with tf.name_scope(\"train\"):\n",
+    "    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
+    "    training_op = optimizer.minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.name_scope(\"eval\"):\n",
+    "    correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    accuracy_summary = tf.summary.scalar('accuracy', accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "init = tf.global_variables_initializer()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we need to define the directory to write the TensorBoard logs to:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "\n",
+    "def log_dir(prefix=\"\"):\n",
+    "    now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
+    "    root_logdir = \"tf_logs\"\n",
+    "    if prefix:\n",
+    "        prefix += \"-\"\n",
+    "    name = prefix + \"run-\" + now\n",
+    "    return \"{}/{}/\".format(root_logdir, name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "logdir = log_dir(\"mnist_dnn\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can create the `FileWriter` that we will use to write the TensorBoard logs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Hey! Why don't we implement early stopping? For this, we are going to need a validation set. Luckily, the dataset returned by TensorFlow's `input_data()` function (see above) is already split into a training set (60,000 instances, already shuffled for us), a validation set (5,000 instances) and a test set (5,000 instances). So we can easily define `X_valid` and `y_valid`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "X_valid = mnist.validation.images\n",
+    "y_valid = mnist.validation.labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "m, n = X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 10001\n",
+    "batch_size = 50\n",
+    "n_batches = int(np.ceil(m / batch_size))\n",
+    "\n",
+    "checkpoint_path = \"/tmp/my_deep_mnist_model.ckpt\"\n",
+    "checkpoint_epoch_path = checkpoint_path + \".epoch\"\n",
+    "final_model_path = \"./my_deep_mnist_model\"\n",
+    "\n",
+    "best_loss = np.infty\n",
+    "epochs_without_progress = 0\n",
+    "max_epochs_without_progress = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    if os.path.isfile(checkpoint_epoch_path):\n",
+    "        # if the checkpoint file exists, restore the model and load the epoch number\n",
+    "        with open(checkpoint_epoch_path, \"rb\") as f:\n",
+    "            start_epoch = int(f.read())\n",
+    "        print(\"Training was interrupted. Continuing at epoch\", start_epoch)\n",
+    "        saver.restore(sess, checkpoint_path)\n",
+    "    else:\n",
+    "        start_epoch = 0\n",
+    "        sess.run(init)\n",
+    "\n",
+    "    for epoch in range(start_epoch, n_epochs):\n",
+    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})\n",
+    "        file_writer.add_summary(accuracy_summary_str, epoch)\n",
+    "        file_writer.add_summary(loss_summary_str, epoch)\n",
+    "        if epoch % 5 == 0:\n",
+    "            print(\"Epoch:\", epoch,\n",
+    "                  \"\\tValidation accuracy: {:.3f}%\".format(accuracy_val * 100),\n",
+    "                  \"\\tLoss: {:.5f}\".format(loss_val))\n",
+    "            saver.save(sess, checkpoint_path)\n",
+    "            with open(checkpoint_epoch_path, \"wb\") as f:\n",
+    "                f.write(b\"%d\" % (epoch + 1))\n",
+    "            if loss_val < best_loss:\n",
+    "                saver.save(sess, final_model_path)\n",
+    "                best_loss = loss_val\n",
+    "            else:\n",
+    "                epochs_without_progress += 5\n",
+    "                if epochs_without_progress > max_epochs_without_progress:\n",
+    "                    print(\"Early stopping\")\n",
+    "                    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "os.remove(checkpoint_epoch_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, final_model_path)\n",
+    "    accuracy_val = accuracy.eval(feed_dict={X: X_test, y: y_test})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "accuracy_val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": true,
-    "deletable": true,
-    "editable": true
+    "collapsed": true
   },
   "outputs": [],
   "source": []