{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "**Chapter 11 – Deep Learning**" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "_This notebook contains all the sample code and solutions to the exercices in chapter 11._" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Setup" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "# To support both python 2 and python 3\n", "from __future__ import division, print_function, unicode_literals\n", "\n", "# Common imports\n", "import numpy as np\n", "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", "rnd.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "plt.rcParams['axes.labelsize'] = 14\n", "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"deep\"\n", "\n", "def save_fig(fig_id, tight_layout=True):\n", " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format='png', dpi=300)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Activation functions" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def logit(z):\n", " return 1 / (1 + np.exp(-z))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "z = np.linspace(-5, 5, 200)\n", "\n", "plt.plot([-5, 5], [0, 0], 'k-')\n", "plt.plot([-5, 5], [1, 1], 'k--')\n", "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n", "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n", "plt.plot(z, logit(z), \"b-\", linewidth=2)\n", "props = dict(facecolor='black', shrink=0.1)\n", "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n", "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n", "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n", "plt.grid(True)\n", "plt.title(\"Sigmoid activation function\", fontsize=14)\n", "plt.axis([-5, 5, -0.2, 1.2])\n", "\n", "save_fig(\"sigmoid_saturation_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def leaky_relu(z, alpha=0.01):\n", " return np.maximum(alpha*z, z)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n", "plt.plot([-5, 5], [0, 0], 'k-')\n", "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n", "plt.grid(True)\n", "props = dict(facecolor='black', shrink=0.1)\n", "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n", "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n", "plt.axis([-5, 5, -0.5, 4.2])\n", "\n", "save_fig(\"leaky_relu_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def elu(z, alpha=1):\n", " return np.where(z<0, alpha*(np.exp(z)-1), z)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.plot(z, elu(z), \"b-\", linewidth=2)\n", "plt.plot([-5, 5], [0, 0], 'k-')\n", "plt.plot([-5, 5], [-1, -1], 'k--')\n", "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", "plt.grid(True)\n", "props = dict(facecolor='black', shrink=0.1)\n", "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n", "plt.axis([-5, 5, -2.2, 3.2])\n", "\n", "save_fig(\"elu_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from tensorflow.examples.tutorials.mnist import input_data\n", "mnist = input_data.read_data_sets(\"/tmp/data/\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def leaky_relu(z, name=None):\n", " return tf.maximum(0.01 * z, z, name=name)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "import tensorflow as tf" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "from IPython.display import clear_output, Image, display, HTML\n", "\n", "def strip_consts(graph_def, max_const_size=32):\n", " \"\"\"Strip large constant values from graph_def.\"\"\"\n", " strip_def = tf.GraphDef()\n", " for n0 in graph_def.node:\n", " n = strip_def.node.add() \n", " n.MergeFrom(n0)\n", " if n.op == 'Const':\n", " tensor = n.attr['value'].tensor\n", " size = len(tensor.tensor_content)\n", " if size > max_const_size:\n", " tensor.tensor_content = b\"\"%size\n", " return strip_def\n", "\n", "def show_graph(graph_def, max_const_size=32):\n", " \"\"\"Visualize TensorFlow graph.\"\"\"\n", " if hasattr(graph_def, 'as_graph_def'):\n", " graph_def = graph_def.as_graph_def()\n", " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", " code = \"\"\"\n", " \n", " \n", "
\n", " \n", "
\n", " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", "\n", " iframe = \"\"\"\n", " \n", " \"\"\".format(code.replace('\"', '"'))\n", " display(HTML(iframe))" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n", "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", "* the default `activation` is now `None` rather than `tf.nn.relu`.\n", "* it does not support `tensorflow.contrib.framework.arg_scope()` (introduced later in chapter 11).\n", "* it does not support regularizer params (introduced later in chapter 11)." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 28*28 # MNIST\n", "n_hidden1 = 300\n", "n_hidden2 = 100\n", "n_outputs = 10\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "\n", "with tf.name_scope(\"dnn\"):\n", " hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name=\"hidden1\")\n", " hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name=\"hidden2\")\n", " logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", " training_op = optimizer.minimize(loss)\n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "batch_size = 100\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"my_model_final.ckpt\")" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Batch Normalization" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Note: the book uses `tensorflow.contrib.layers.batch_norm()` rather than `tf.layers.batch_normalization()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.batch_normalization()`, because anything in the contrib module may change or be deleted without notice. Instead of using the `batch_norm()` function as a regularizer parameter to the `fully_connected()` function, we now use `batch_normalization()` and we explicitly create a distinct layer. The parameters are a bit different, in particular:\n", "* `decay` is renamed to `momentum`,\n", "* `is_training` is renamed to `training`,\n", "* `updates_collections` is removed: the update operations needed by batch normalization are added to the `UPDATE_OPS` collection and you need to explicity run these operations during training (see the execution phase below),\n", "* we don't need to specify `scale=True`, as that is the default.\n", "\n", "Also note that in order to run batch norm just _before_ each hidden layer's activation function, we apply the ELU activation function manually, right after the batch norm layer.\n", "\n", "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`). As you can see, the code remains very similar." ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from functools import partial\n", "\n", "n_inputs = 28 * 28 # MNIST\n", "n_hidden1 = 300\n", "n_hidden2 = 100\n", "n_outputs = 10\n", "learning_rate = 0.01\n", "momentum = 0.25\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", "\n", "with tf.name_scope(\"dnn\"):\n", " he_init = tf.contrib.layers.variance_scaling_initializer()\n", "\n", " my_batch_norm_layer = partial(\n", " tf.layers.batch_normalization,\n", " training=is_training,\n", " momentum=0.9)\n", "\n", " my_dense_layer = partial(\n", " tf.layers.dense,\n", " kernel_initializer=he_init)\n", "\n", " hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n", " bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n", " hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n", " bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n", " logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n", " logits = my_batch_norm_layer(logits_before_bn)\n", " extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", " training_op = optimizer.minimize(loss)\n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Note: since we are using `tf.layers.batch_normalization()` rather than `tf.contrib.layers.batch_norm()` (as in the book), we need to explicitly run the extra update operations needed by batch normalization (`sess.run([training_op, extra_update_ops],...`)." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "batch_size = 200\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"my_model_final.ckpt\")" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Now the same model with $\\ell_1$ regularization:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from functools import partial\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", "\n", "with tf.name_scope(\"dnn\"):\n", " he_init = tf.contrib.layers.variance_scaling_initializer()\n", "\n", " my_batch_norm_layer = partial(\n", " tf.layers.batch_normalization,\n", " training=is_training,\n", " momentum=0.9)\n", "\n", " my_dense_layer = partial(\n", " tf.layers.dense,\n", " kernel_initializer=he_init,\n", " kernel_regularizer=tf.contrib.layers.l1_regularizer(0.01))\n", "\n", " hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n", " bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n", " hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n", " bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n", " logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n", " logits = my_batch_norm_layer(logits_before_bn)\n", " extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", " base_loss = tf.reduce_mean(xentropy, name=\"base_loss\")\n", " loss = tf.add_n([base_loss] + reg_losses, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", " training_op = optimizer.minimize(loss)\n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", "\n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "batch_size = 200\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"my_model_final.ckpt\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "[v.name for v in tf.global_variables()]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Note: the weights variable created by the `tf.layers.dense()` function is called `\"kernel\"` (instead of `\"weights\"` when using the `tf.contrib.layers.fully_connected()`, as in the book):" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with tf.variable_scope(\"\", default_name=\"\", reuse=True): # root scope\n", " weights1 = tf.get_variable(\"hidden1/kernel\")\n", " weights2 = tf.get_variable(\"hidden2/kernel\")\n", " " ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "x = tf.constant([0., 0., 3., 4., 30., 40., 300., 400.], shape=(4, 2))\n", "c = tf.clip_by_norm(x, clip_norm=10)\n", "c0 = tf.clip_by_norm(x, clip_norm=350, axes=0)\n", "c1 = tf.clip_by_norm(x, clip_norm=10, axes=1)\n", "\n", "with tf.Session() as sess:\n", " xv = x.eval()\n", " cv = c.eval()\n", " c0v = c0.eval()\n", " c1v = c1.eval()\n", "\n", "print(xv)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(cv)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(np.linalg.norm(cv))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(c0v)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(np.linalg.norm(c0v, axis=0))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(c1v)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(np.linalg.norm(c1v, axis=1))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from functools import partial\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", "\n", "def max_norm_regularizer(threshold, axes=1, name=\"max_norm\", collection=\"max_norm\"):\n", " def max_norm(weights):\n", " clip_weights = tf.assign(weights, tf.clip_by_norm(weights, clip_norm=threshold, axes=axes), name=name)\n", " tf.add_to_collection(collection, clip_weights)\n", " return None # there is no regularization loss term\n", " return max_norm\n", "\n", "with tf.name_scope(\"dnn\"):\n", " \n", " my_dense_layer = partial(\n", " tf.layers.dense,\n", " activation=tf.nn.relu,\n", " kernel_regularizer=max_norm_regularizer(1.5))\n", "\n", " hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n", " hidden2 = my_dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n", " logits = my_dense_layer(hidden2, n_outputs, activation=None, name=\"outputs\")\n", "\n", "clip_all_weights = tf.get_collection(\"max_norm\")\n", " \n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", " threshold = 1.0\n", " grads_and_vars = optimizer.compute_gradients(loss)\n", " capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)\n", " for grad, var in grads_and_vars]\n", " training_op = optimizer.apply_gradients(capped_gvs)\n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", " sess.run(clip_all_weights)\n", " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"my_model_final.ckpt\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n", "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n", "* the `is_training` parameter is renamed to `training`." ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from functools import partial\n", "\n", "tf.reset_default_graph()\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n", "\n", "initial_learning_rate = 0.1\n", "decay_steps = 10000\n", "decay_rate = 1/10\n", "global_step = tf.Variable(0, trainable=False)\n", "learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n", " decay_steps, decay_rate)\n", "\n", "dropout_rate = 0.5\n", "\n", "with tf.name_scope(\"dnn\"):\n", " he_init = tf.contrib.layers.variance_scaling_initializer()\n", "\n", " my_dense_layer = partial(\n", " tf.layers.dense,\n", " activation=tf.nn.elu,\n", " kernel_initializer=he_init)\n", "\n", " X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n", " hidden1 = my_dense_layer(X_drop, n_hidden1, name=\"hidden1\")\n", " hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=is_training)\n", " hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name=\"hidden2\")\n", " hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=is_training)\n", " logits = my_dense_layer(hidden2_drop, n_outputs, activation=None, name=\"outputs\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", " training_op = optimizer.minimize(loss, global_step=global_step) \n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(len(mnist.test.labels)//batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"my_model_final.ckpt\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n", " scope=\"hidden[2]|outputs\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "training_op2 = optimizer.minimize(loss, var_list=train_vars)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for i in tf.global_variables():\n", " print(i.name)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n", " print(i.name)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for i in train_vars:\n", " print(i.name)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true, "deletable": true, "editable": true }, "source": [ "# Exercise solutions" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "**Coming soon**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.3" }, "nav_menu": { "height": "360px", "width": "416px" }, "toc": { "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 6, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }