932 lines
28 KiB
Plaintext
932 lines
28 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Chapter 11 – Deep Learning**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"_This notebook contains all the sample code and solutions to the exercices in chapter 11._"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Setup"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# To support both python 2 and python 3\n",
|
|||
|
"from __future__ import division, print_function, unicode_literals\n",
|
|||
|
"\n",
|
|||
|
"# Common imports\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import numpy.random as rnd\n",
|
|||
|
"import os\n",
|
|||
|
"\n",
|
|||
|
"# to make this notebook's output stable across runs\n",
|
|||
|
"rnd.seed(42)\n",
|
|||
|
"\n",
|
|||
|
"# To plot pretty figures\n",
|
|||
|
"%matplotlib inline\n",
|
|||
|
"import matplotlib\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"plt.rcParams['axes.labelsize'] = 14\n",
|
|||
|
"plt.rcParams['xtick.labelsize'] = 12\n",
|
|||
|
"plt.rcParams['ytick.labelsize'] = 12\n",
|
|||
|
"\n",
|
|||
|
"# Where to save the figures\n",
|
|||
|
"PROJECT_ROOT_DIR = \".\"\n",
|
|||
|
"CHAPTER_ID = \"deep\"\n",
|
|||
|
"\n",
|
|||
|
"def save_fig(fig_id, tight_layout=True):\n",
|
|||
|
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
|
|||
|
" print(\"Saving figure\", fig_id)\n",
|
|||
|
" if tight_layout:\n",
|
|||
|
" plt.tight_layout()\n",
|
|||
|
" plt.savefig(path, format='png', dpi=300)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Activation functions"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def logit(z):\n",
|
|||
|
" return 1 / (1 + np.exp(-z))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"z = np.linspace(-5, 5, 200)\n",
|
|||
|
"\n",
|
|||
|
"plt.plot([-5, 5], [0, 0], 'k-')\n",
|
|||
|
"plt.plot([-5, 5], [1, 1], 'k--')\n",
|
|||
|
"plt.plot([0, 0], [-0.2, 1.2], 'k-')\n",
|
|||
|
"plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n",
|
|||
|
"plt.plot(z, logit(z), \"b-\", linewidth=2)\n",
|
|||
|
"props = dict(facecolor='black', shrink=0.1)\n",
|
|||
|
"plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n",
|
|||
|
"plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n",
|
|||
|
"plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.title(\"Sigmoid activation function\", fontsize=14)\n",
|
|||
|
"plt.axis([-5, 5, -0.2, 1.2])\n",
|
|||
|
"\n",
|
|||
|
"save_fig(\"sigmoid_saturation_plot\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def leaky_relu(z, alpha=0.01):\n",
|
|||
|
" return np.maximum(alpha*z, z)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n",
|
|||
|
"plt.plot([-5, 5], [0, 0], 'k-')\n",
|
|||
|
"plt.plot([0, 0], [-0.5, 4.2], 'k-')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"props = dict(facecolor='black', shrink=0.1)\n",
|
|||
|
"plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n",
|
|||
|
"plt.title(\"Leaky ReLU activation function\", fontsize=14)\n",
|
|||
|
"plt.axis([-5, 5, -0.5, 4.2])\n",
|
|||
|
"\n",
|
|||
|
"save_fig(\"leaky_relu_plot\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def elu(z, alpha=1):\n",
|
|||
|
" return np.where(z<0, alpha*(np.exp(z)-1), z)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plt.plot(z, elu(z), \"b-\", linewidth=2)\n",
|
|||
|
"plt.plot([-5, 5], [0, 0], 'k-')\n",
|
|||
|
"plt.plot([-5, 5], [-1, -1], 'k--')\n",
|
|||
|
"plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"props = dict(facecolor='black', shrink=0.1)\n",
|
|||
|
"plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n",
|
|||
|
"plt.axis([-5, 5, -2.2, 3.2])\n",
|
|||
|
"\n",
|
|||
|
"save_fig(\"elu_plot\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from tensorflow.examples.tutorials.mnist import input_data\n",
|
|||
|
"mnist = input_data.read_data_sets(\"/tmp/data/\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def leaky_relu(z, name=None):\n",
|
|||
|
" return tf.maximum(0.01 * z, z, name=name)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import tensorflow as tf"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from IPython.display import clear_output, Image, display, HTML\n",
|
|||
|
"\n",
|
|||
|
"def strip_consts(graph_def, max_const_size=32):\n",
|
|||
|
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
|
|||
|
" strip_def = tf.GraphDef()\n",
|
|||
|
" for n0 in graph_def.node:\n",
|
|||
|
" n = strip_def.node.add() \n",
|
|||
|
" n.MergeFrom(n0)\n",
|
|||
|
" if n.op == 'Const':\n",
|
|||
|
" tensor = n.attr['value'].tensor\n",
|
|||
|
" size = len(tensor.tensor_content)\n",
|
|||
|
" if size > max_const_size:\n",
|
|||
|
" tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
|
|||
|
" return strip_def\n",
|
|||
|
"\n",
|
|||
|
"def show_graph(graph_def, max_const_size=32):\n",
|
|||
|
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
|
|||
|
" if hasattr(graph_def, 'as_graph_def'):\n",
|
|||
|
" graph_def = graph_def.as_graph_def()\n",
|
|||
|
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
|
|||
|
" code = \"\"\"\n",
|
|||
|
" <script>\n",
|
|||
|
" function load() {{\n",
|
|||
|
" document.getElementById(\"{id}\").pbtxt = {data};\n",
|
|||
|
" }}\n",
|
|||
|
" </script>\n",
|
|||
|
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
|
|||
|
" <div style=\"height:600px\">\n",
|
|||
|
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
|
|||
|
" </div>\n",
|
|||
|
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
|
|||
|
"\n",
|
|||
|
" iframe = \"\"\"\n",
|
|||
|
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
|
|||
|
" \"\"\".format(code.replace('\"', '"'))\n",
|
|||
|
" display(HTML(iframe))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from tensorflow.contrib.layers import fully_connected\n",
|
|||
|
"\n",
|
|||
|
"tf.reset_default_graph()\n",
|
|||
|
"\n",
|
|||
|
"n_inputs = 28*28 # MNIST\n",
|
|||
|
"n_hidden1 = 300\n",
|
|||
|
"n_hidden2 = 100\n",
|
|||
|
"n_outputs = 10\n",
|
|||
|
"learning_rate = 0.01\n",
|
|||
|
"\n",
|
|||
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
|||
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"dnn\"):\n",
|
|||
|
" hidden1 = fully_connected(X, n_hidden1, activation_fn=leaky_relu, scope=\"hidden1\")\n",
|
|||
|
" hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=leaky_relu, scope=\"hidden2\")\n",
|
|||
|
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"loss\"):\n",
|
|||
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
|
|||
|
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"train\"):\n",
|
|||
|
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
|
|||
|
" training_op = optimizer.minimize(loss)\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"eval\"):\n",
|
|||
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
|||
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
|||
|
" \n",
|
|||
|
"init = tf.initialize_all_variables()\n",
|
|||
|
"saver = tf.train.Saver()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"n_epochs = 20\n",
|
|||
|
"batch_size = 100\n",
|
|||
|
"\n",
|
|||
|
"with tf.Session() as sess:\n",
|
|||
|
" init.run()\n",
|
|||
|
" for epoch in range(n_epochs):\n",
|
|||
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
|||
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
|||
|
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
|||
|
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
|||
|
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
|
|||
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
|||
|
"\n",
|
|||
|
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Batch Normalization"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from tensorflow.contrib.layers import fully_connected, batch_norm\n",
|
|||
|
"from tensorflow.contrib.framework import arg_scope\n",
|
|||
|
"\n",
|
|||
|
"tf.reset_default_graph()\n",
|
|||
|
"\n",
|
|||
|
"n_inputs = 28 * 28 # MNIST\n",
|
|||
|
"n_hidden1 = 300\n",
|
|||
|
"n_hidden2 = 100\n",
|
|||
|
"n_outputs = 10\n",
|
|||
|
"learning_rate = 0.01\n",
|
|||
|
"momentum = 0.25\n",
|
|||
|
"\n",
|
|||
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
|||
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
|||
|
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"dnn\"):\n",
|
|||
|
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
|||
|
" batch_norm_params = {\n",
|
|||
|
" 'is_training': is_training,\n",
|
|||
|
" 'decay': 0.9,\n",
|
|||
|
" 'updates_collections': None,\n",
|
|||
|
" 'scale': True,\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" with arg_scope(\n",
|
|||
|
" [fully_connected],\n",
|
|||
|
" activation_fn=tf.nn.elu,\n",
|
|||
|
" weights_initializer=he_init,\n",
|
|||
|
" normalizer_fn=batch_norm,\n",
|
|||
|
" normalizer_params=batch_norm_params):\n",
|
|||
|
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
|||
|
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
|||
|
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"loss\"):\n",
|
|||
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
|
|||
|
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"train\"):\n",
|
|||
|
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
|
|||
|
" training_op = optimizer.minimize(loss)\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"eval\"):\n",
|
|||
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
|||
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
|||
|
" \n",
|
|||
|
"init = tf.initialize_all_variables()\n",
|
|||
|
"saver = tf.train.Saver()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"n_epochs = 20\n",
|
|||
|
"batch_size = 50\n",
|
|||
|
"\n",
|
|||
|
"with tf.Session() as sess:\n",
|
|||
|
" init.run()\n",
|
|||
|
" for epoch in range(n_epochs):\n",
|
|||
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
|||
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
|||
|
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
|||
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
|||
|
"\n",
|
|||
|
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"tf.reset_default_graph()\n",
|
|||
|
"\n",
|
|||
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
|||
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
|||
|
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"dnn\"):\n",
|
|||
|
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
|||
|
" batch_norm_params = {\n",
|
|||
|
" 'is_training': is_training,\n",
|
|||
|
" 'decay': 0.9,\n",
|
|||
|
" 'updates_collections': None,\n",
|
|||
|
" 'scale': True,\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" with arg_scope(\n",
|
|||
|
" [fully_connected],\n",
|
|||
|
" activation_fn=tf.nn.elu,\n",
|
|||
|
" weights_initializer=he_init,\n",
|
|||
|
" normalizer_fn=batch_norm,\n",
|
|||
|
" normalizer_params=batch_norm_params,\n",
|
|||
|
" weights_regularizer=tf.contrib.layers.l1_regularizer(0.01)):\n",
|
|||
|
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
|||
|
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
|||
|
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"loss\"):\n",
|
|||
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
|
|||
|
" reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n",
|
|||
|
" base_loss = tf.reduce_mean(xentropy, name=\"base_loss\")\n",
|
|||
|
" loss = tf.add(base_loss, reg_losses, name=\"loss\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"train\"):\n",
|
|||
|
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
|
|||
|
" training_op = optimizer.minimize(loss)\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"eval\"):\n",
|
|||
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
|||
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
|||
|
" \n",
|
|||
|
"init = tf.initialize_all_variables()\n",
|
|||
|
"saver = tf.train.Saver()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"n_epochs = 20\n",
|
|||
|
"batch_size = 50\n",
|
|||
|
"\n",
|
|||
|
"with tf.Session() as sess:\n",
|
|||
|
" init.run()\n",
|
|||
|
" for epoch in range(n_epochs):\n",
|
|||
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
|||
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
|||
|
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
|||
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
|||
|
"\n",
|
|||
|
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"[v.name for v in tf.all_variables()]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"with tf.variable_scope(\"\", reuse=True):\n",
|
|||
|
" weights1 = tf.get_variable(\"hidden1/weights\")\n",
|
|||
|
" weights2 = tf.get_variable(\"hidden2/weights\")\n",
|
|||
|
" "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"tf.reset_default_graph()\n",
|
|||
|
"\n",
|
|||
|
"x = tf.constant([0., 0., 3., 4., 30., 40., 300., 400.], shape=(4, 2))\n",
|
|||
|
"c = tf.clip_by_norm(x, clip_norm=10)\n",
|
|||
|
"c0 = tf.clip_by_norm(x, clip_norm=350, axes=0)\n",
|
|||
|
"c1 = tf.clip_by_norm(x, clip_norm=10, axes=1)\n",
|
|||
|
"\n",
|
|||
|
"with tf.Session() as sess:\n",
|
|||
|
" xv = x.eval()\n",
|
|||
|
" cv = c.eval()\n",
|
|||
|
" c0v = c0.eval()\n",
|
|||
|
" c1v = c1.eval()\n",
|
|||
|
"\n",
|
|||
|
"print(xv)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print(cv)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print(np.linalg.norm(cv))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print(c0v)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print(np.linalg.norm(c0v, axis=0))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print(c1v)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print(np.linalg.norm(c1v, axis=1))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"tf.reset_default_graph()\n",
|
|||
|
"\n",
|
|||
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
|||
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
|||
|
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
|||
|
"\n",
|
|||
|
"def max_norm_regularizer(threshold, axes=1, name=\"max_norm\", collection=\"max_norm\"):\n",
|
|||
|
" def max_norm(weights):\n",
|
|||
|
" clip_weights = tf.assign(weights, tf.clip_by_norm(weights, clip_norm=threshold, axes=axes), name=name)\n",
|
|||
|
" tf.add_to_collection(collection, clip_weights)\n",
|
|||
|
" return None # there is no regularization loss term\n",
|
|||
|
" return max_norm\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"dnn\"):\n",
|
|||
|
" with arg_scope(\n",
|
|||
|
" [fully_connected],\n",
|
|||
|
" weights_regularizer=max_norm_regularizer(1.5)):\n",
|
|||
|
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
|||
|
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
|||
|
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
|||
|
"\n",
|
|||
|
"clip_all_weights = tf.get_collection(\"max_norm\")\n",
|
|||
|
" \n",
|
|||
|
"with tf.name_scope(\"loss\"):\n",
|
|||
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
|
|||
|
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"train\"):\n",
|
|||
|
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
|
|||
|
" threshold = 1.0\n",
|
|||
|
" grads_and_vars = optimizer.compute_gradients(loss)\n",
|
|||
|
" capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)\n",
|
|||
|
" for grad, var in grads_and_vars]\n",
|
|||
|
" training_op = optimizer.apply_gradients(capped_gvs)\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"eval\"):\n",
|
|||
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
|||
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
|||
|
" \n",
|
|||
|
"init = tf.initialize_all_variables()\n",
|
|||
|
"saver = tf.train.Saver()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"n_epochs = 20\n",
|
|||
|
"batch_size = 50\n",
|
|||
|
"\n",
|
|||
|
"with tf.Session() as sess:\n",
|
|||
|
" init.run()\n",
|
|||
|
" for epoch in range(n_epochs):\n",
|
|||
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
|||
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
|||
|
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
|||
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
|||
|
"\n",
|
|||
|
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"show_graph(tf.get_default_graph())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from tensorflow.contrib.layers import dropout\n",
|
|||
|
"\n",
|
|||
|
"tf.reset_default_graph()\n",
|
|||
|
"\n",
|
|||
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
|||
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
|||
|
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
|||
|
"\n",
|
|||
|
"initial_learning_rate = 0.1\n",
|
|||
|
"decay_steps = 10000\n",
|
|||
|
"decay_rate = 1/10\n",
|
|||
|
"global_step = tf.Variable(0, trainable=False)\n",
|
|||
|
"learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n",
|
|||
|
" decay_steps, decay_rate)\n",
|
|||
|
"\n",
|
|||
|
"keep_prob = 0.5\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"dnn\"):\n",
|
|||
|
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
|||
|
" with arg_scope(\n",
|
|||
|
" [fully_connected],\n",
|
|||
|
" activation_fn=tf.nn.elu,\n",
|
|||
|
" weights_initializer=he_init):\n",
|
|||
|
" X_drop = dropout(X, keep_prob, is_training=is_training)\n",
|
|||
|
" hidden1 = fully_connected(X_drop, n_hidden1, scope=\"hidden1\")\n",
|
|||
|
" hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)\n",
|
|||
|
" hidden2 = fully_connected(hidden1_drop, n_hidden2, scope=\"hidden2\")\n",
|
|||
|
" hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)\n",
|
|||
|
" logits = fully_connected(hidden2_drop, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"loss\"):\n",
|
|||
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
|
|||
|
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"train\"):\n",
|
|||
|
" optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
|
|||
|
" training_op = optimizer.minimize(loss, global_step=global_step) \n",
|
|||
|
"\n",
|
|||
|
"with tf.name_scope(\"eval\"):\n",
|
|||
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
|||
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
|||
|
" \n",
|
|||
|
"init = tf.initialize_all_variables()\n",
|
|||
|
"saver = tf.train.Saver()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"n_epochs = 20\n",
|
|||
|
"batch_size = 50\n",
|
|||
|
"\n",
|
|||
|
"with tf.Session() as sess:\n",
|
|||
|
" init.run()\n",
|
|||
|
" for epoch in range(n_epochs):\n",
|
|||
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
|||
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
|||
|
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
|||
|
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
|||
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
|||
|
"\n",
|
|||
|
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n",
|
|||
|
" scope=\"hidden[2]|outputs\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"training_op2 = optimizer.minimize(loss, var_list=train_vars)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"for i in tf.all_variables():\n",
|
|||
|
" print(i.name)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):\n",
|
|||
|
" print(i.name)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"for i in train_vars:\n",
|
|||
|
" print(i.name)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"X_train = mnist.train.images\n",
|
|||
|
"y_train = mnist.train.labels.astype(\"int\")\n",
|
|||
|
"X_val = mnist.test.images[8000:]\n",
|
|||
|
"y_val = mnist.test.labels[8000:].astype(\"int\")\n",
|
|||
|
"\n",
|
|||
|
"feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
|
|||
|
"dnn_clf = tf.contrib.learn.DNNClassifier(\n",
|
|||
|
" feature_columns = feature_columns,\n",
|
|||
|
" hidden_units=[300, 100],\n",
|
|||
|
" n_classes=10,\n",
|
|||
|
" model_dir=\"/tmp/my_model\",\n",
|
|||
|
" config=tf.contrib.learn.RunConfig(save_checkpoints_secs=60)\n",
|
|||
|
" )\n",
|
|||
|
"\n",
|
|||
|
"validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(\n",
|
|||
|
" X_val,\n",
|
|||
|
" y_val,\n",
|
|||
|
" every_n_steps=50,\n",
|
|||
|
" early_stopping_metric=\"loss\",\n",
|
|||
|
" early_stopping_metric_minimize=True,\n",
|
|||
|
" early_stopping_rounds=2000\n",
|
|||
|
" )\n",
|
|||
|
"\n",
|
|||
|
"dnn_clf.fit(x=X_train,\n",
|
|||
|
" y=y_train,\n",
|
|||
|
" steps=40000,\n",
|
|||
|
" monitors=[validation_monitor]\n",
|
|||
|
" )\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Exercise solutions"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Coming soon**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.5.1"
|
|||
|
},
|
|||
|
"nav_menu": {
|
|||
|
"height": "360px",
|
|||
|
"width": "416px"
|
|||
|
},
|
|||
|
"toc": {
|
|||
|
"navigate_menu": true,
|
|||
|
"number_sections": true,
|
|||
|
"sideBar": true,
|
|||
|
"threshold": 6,
|
|||
|
"toc_cell": false,
|
|||
|
"toc_section_display": "block",
|
|||
|
"toc_window_display": false
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 0
|
|||
|
}
|