handson-ml/11_deep_learning.ipynb

1074 lines
33 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"**Chapter 11 Deep Learning**"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"_This notebook contains all the sample code and solutions to the exercices in chapter 11._"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import numpy.random as rnd\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"rnd.seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"deep\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Activation functions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def logit(z):\n",
" return 1 / (1 + np.exp(-z))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"z = np.linspace(-5, 5, 200)\n",
"\n",
"plt.plot([-5, 5], [0, 0], 'k-')\n",
"plt.plot([-5, 5], [1, 1], 'k--')\n",
"plt.plot([0, 0], [-0.2, 1.2], 'k-')\n",
"plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n",
"plt.plot(z, logit(z), \"b-\", linewidth=2)\n",
"props = dict(facecolor='black', shrink=0.1)\n",
"plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n",
"plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n",
"plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n",
"plt.grid(True)\n",
"plt.title(\"Sigmoid activation function\", fontsize=14)\n",
"plt.axis([-5, 5, -0.2, 1.2])\n",
"\n",
"save_fig(\"sigmoid_saturation_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def leaky_relu(z, alpha=0.01):\n",
" return np.maximum(alpha*z, z)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n",
"plt.plot([-5, 5], [0, 0], 'k-')\n",
"plt.plot([0, 0], [-0.5, 4.2], 'k-')\n",
"plt.grid(True)\n",
"props = dict(facecolor='black', shrink=0.1)\n",
"plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n",
"plt.title(\"Leaky ReLU activation function\", fontsize=14)\n",
"plt.axis([-5, 5, -0.5, 4.2])\n",
"\n",
"save_fig(\"leaky_relu_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def elu(z, alpha=1):\n",
" return np.where(z<0, alpha*(np.exp(z)-1), z)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.plot(z, elu(z), \"b-\", linewidth=2)\n",
"plt.plot([-5, 5], [0, 0], 'k-')\n",
"plt.plot([-5, 5], [-1, -1], 'k--')\n",
"plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
"plt.grid(True)\n",
"props = dict(facecolor='black', shrink=0.1)\n",
"plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n",
"plt.axis([-5, 5, -2.2, 3.2])\n",
"\n",
"save_fig(\"elu_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"mnist = input_data.read_data_sets(\"/tmp/data/\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def leaky_relu(z, name=None):\n",
" return tf.maximum(0.01 * z, z, name=name)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
"\n",
"def strip_consts(graph_def, max_const_size=32):\n",
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
" strip_def = tf.GraphDef()\n",
" for n0 in graph_def.node:\n",
" n = strip_def.node.add() \n",
" n.MergeFrom(n0)\n",
" if n.op == 'Const':\n",
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
" tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
" return strip_def\n",
"\n",
"def show_graph(graph_def, max_const_size=32):\n",
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
" if hasattr(graph_def, 'as_graph_def'):\n",
" graph_def = graph_def.as_graph_def()\n",
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
" code = \"\"\"\n",
" <script>\n",
" function load() {{\n",
" document.getElementById(\"{id}\").pbtxt = {data};\n",
" }}\n",
" </script>\n",
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
" <div style=\"height:600px\">\n",
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
" </div>\n",
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
"\n",
" iframe = \"\"\"\n",
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
" \"\"\".format(code.replace('\"', '&quot;'))\n",
" display(HTML(iframe))"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
"* the default `activation` is now `None` rather than `tf.nn.relu`.\n",
"* it does not support `tensorflow.contrib.framework.arg_scope()` (introduced later in chapter 11).\n",
"* it does not support regularizer params (introduced later in chapter 11)."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_inputs = 28*28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10\n",
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name=\"hidden1\")\n",
" hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name=\"hidden2\")\n",
" logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n",
"\n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
" training_op = optimizer.minimize(loss)\n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"batch_size = 100\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Batch Normalization"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: the book uses `tensorflow.contrib.layers.batch_norm()` rather than `tf.layers.batch_normalization()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.batch_normalization()`, because anything in the contrib module may change or be deleted without notice. Instead of using the `batch_norm()` function as a regularizer parameter to the `fully_connected()` function, we now use `batch_normalization()` and we explicitly create a distinct layer. The parameters are a bit different, in particular:\n",
"* `decay` is renamed to `momentum`,\n",
"* `is_training` is renamed to `training`,\n",
"* `updates_collections` is removed: the update operations needed by batch normalization are added to the `UPDATE_OPS` collection and you need to explicity run these operations during training (see the execution phase below),\n",
"* we don't need to specify `scale=True`, as that is the default.\n",
"\n",
"Also note that in order to run batch norm just _before_ each hidden layer's activation function, we apply the ELU activation function manually, right after the batch norm layer.\n",
"\n",
"Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`). As you can see, the code remains very similar."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from functools import partial\n",
"\n",
"n_inputs = 28 * 28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10\n",
"learning_rate = 0.01\n",
"momentum = 0.25\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
"\n",
" my_batch_norm_layer = partial(\n",
" tf.layers.batch_normalization,\n",
" training=is_training,\n",
" momentum=0.9)\n",
"\n",
" my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" kernel_initializer=he_init)\n",
"\n",
" hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
" bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n",
" hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n",
" bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n",
" logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n",
" logits = my_batch_norm_layer(logits_before_bn)\n",
" extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
"\n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
" training_op = optimizer.minimize(loss)\n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: since we are using `tf.layers.batch_normalization()` rather than `tf.contrib.layers.batch_norm()` (as in the book), we need to explicitly run the extra update operations needed by batch normalization (`sess.run([training_op, extra_update_ops],...`)."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"batch_size = 200\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Now the same model with $\\ell_1$ regularization:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from functools import partial\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
"\n",
" my_batch_norm_layer = partial(\n",
" tf.layers.batch_normalization,\n",
" training=is_training,\n",
" momentum=0.9)\n",
"\n",
" my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" kernel_initializer=he_init,\n",
" kernel_regularizer=tf.contrib.layers.l1_regularizer(0.01))\n",
"\n",
" hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
" bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n",
" hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n",
" bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n",
" logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n",
" logits = my_batch_norm_layer(logits_before_bn)\n",
" extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
"\n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n",
" base_loss = tf.reduce_mean(xentropy, name=\"base_loss\")\n",
" loss = tf.add_n([base_loss] + reg_losses, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
" training_op = optimizer.minimize(loss)\n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"batch_size = 200\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"[v.name for v in tf.global_variables()]"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: the weights variable created by the `tf.layers.dense()` function is called `\"kernel\"` (instead of `\"weights\"` when using the `tf.contrib.layers.fully_connected()`, as in the book):"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.variable_scope(\"\", default_name=\"\", reuse=True): # root scope\n",
" weights1 = tf.get_variable(\"hidden1/kernel\")\n",
" weights2 = tf.get_variable(\"hidden2/kernel\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"x = tf.constant([0., 0., 3., 4., 30., 40., 300., 400.], shape=(4, 2))\n",
"c = tf.clip_by_norm(x, clip_norm=10)\n",
"c0 = tf.clip_by_norm(x, clip_norm=350, axes=0)\n",
"c1 = tf.clip_by_norm(x, clip_norm=10, axes=1)\n",
"\n",
"with tf.Session() as sess:\n",
" xv = x.eval()\n",
" cv = c.eval()\n",
" c0v = c0.eval()\n",
" c1v = c1.eval()\n",
"\n",
"print(xv)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(cv)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(np.linalg.norm(cv))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(c0v)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(np.linalg.norm(c0v, axis=0))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(c1v)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(np.linalg.norm(c1v, axis=1))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from functools import partial\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
"\n",
"def max_norm_regularizer(threshold, axes=1, name=\"max_norm\", collection=\"max_norm\"):\n",
" def max_norm(weights):\n",
" clip_weights = tf.assign(weights, tf.clip_by_norm(weights, clip_norm=threshold, axes=axes), name=name)\n",
" tf.add_to_collection(collection, clip_weights)\n",
" return None # there is no regularization loss term\n",
" return max_norm\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" \n",
" my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" activation=tf.nn.relu,\n",
" kernel_regularizer=max_norm_regularizer(1.5))\n",
"\n",
" hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
" hidden2 = my_dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n",
" logits = my_dense_layer(hidden2, n_outputs, activation=None, name=\"outputs\")\n",
"\n",
"clip_all_weights = tf.get_collection(\"max_norm\")\n",
" \n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
" threshold = 1.0\n",
" grads_and_vars = optimizer.compute_gradients(loss)\n",
" capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)\n",
" for grad, var in grads_and_vars]\n",
" training_op = optimizer.apply_gradients(capped_gvs)\n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
" sess.run(clip_all_weights)\n",
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
"* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
"* the `is_training` parameter is renamed to `training`."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from functools import partial\n",
"\n",
"tf.reset_default_graph()\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
"\n",
"initial_learning_rate = 0.1\n",
"decay_steps = 10000\n",
"decay_rate = 1/10\n",
"global_step = tf.Variable(0, trainable=False)\n",
"learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n",
" decay_steps, decay_rate)\n",
"\n",
"dropout_rate = 0.5\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
"\n",
" my_dense_layer = partial(\n",
" tf.layers.dense,\n",
" activation=tf.nn.elu,\n",
" kernel_initializer=he_init)\n",
"\n",
" X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
" hidden1 = my_dense_layer(X_drop, n_hidden1, name=\"hidden1\")\n",
" hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=is_training)\n",
" hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name=\"hidden2\")\n",
" hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=is_training)\n",
" logits = my_dense_layer(hidden2_drop, n_outputs, activation=None, name=\"outputs\")\n",
"\n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
" training_op = optimizer.minimize(loss, global_step=global_step) \n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n",
" scope=\"hidden[2]|outputs\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"training_op2 = optimizer.minimize(loss, var_list=train_vars)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"for i in tf.global_variables():\n",
" print(i.name)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n",
" print(i.name)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"for i in train_vars:\n",
" print(i.name)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"**Coming soon**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
},
"nav_menu": {
"height": "360px",
"width": "416px"
},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}