From fd0ce384f200820a25eca44a502a1d65d5dbe0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Fri, 27 Oct 2017 16:19:15 +0200 Subject: [PATCH] Set DropoutWrapper's input_keep_prob parameter using a placeholder, fixes #111 --- 14_recurrent_neural_networks.ipynb | 364 +++++++---------------------- 1 file changed, 89 insertions(+), 275 deletions(-) diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb index c4547b1..267d821 100644 --- a/14_recurrent_neural_networks.ipynb +++ b/14_recurrent_neural_networks.ipynb @@ -31,9 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# To support both python 2 and python 3\n", @@ -79,9 +77,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf" @@ -104,9 +100,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -130,9 +124,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -173,9 +165,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_inputs = 3\n", @@ -185,9 +175,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -204,9 +192,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -215,9 +201,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n", @@ -249,9 +233,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import clear_output, Image, display, HTML\n", @@ -311,9 +293,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_steps = 2\n", @@ -324,9 +304,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -343,9 +321,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -354,9 +330,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.array([\n", @@ -400,9 +374,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_steps = 2\n", @@ -413,9 +385,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -429,9 +399,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -440,9 +408,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.array([\n", @@ -485,9 +451,7 @@ { "cell_type": "code", "execution_count": 27, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_steps = 2\n", @@ -503,9 +467,7 @@ { "cell_type": "code", "execution_count": 28, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "seq_length = tf.placeholder(tf.int32, [None])\n", @@ -516,9 +478,7 @@ { "cell_type": "code", "execution_count": 29, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -527,9 +487,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.array([\n", @@ -545,9 +503,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -593,9 +549,7 @@ { "cell_type": "code", "execution_count": 34, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -668,9 +622,7 @@ { "cell_type": "code", "execution_count": 37, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -688,9 +640,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_neurons = 100\n", @@ -706,9 +656,7 @@ { "cell_type": "code", "execution_count": 39, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "states_concat = tf.concat(axis=1, values=states)\n", @@ -754,9 +702,7 @@ { "cell_type": "code", "execution_count": 41, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "t_min, t_max = 0, 30\n", @@ -808,9 +754,7 @@ { "cell_type": "code", "execution_count": 43, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch, y_batch = next_batch(1, n_steps)" @@ -842,9 +786,7 @@ { "cell_type": "code", "execution_count": 45, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -871,9 +813,7 @@ { "cell_type": "code", "execution_count": 46, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -890,9 +830,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "cell = tf.contrib.rnn.OutputProjectionWrapper(\n", @@ -903,9 +841,7 @@ { "cell_type": "code", "execution_count": 48, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)" @@ -914,9 +850,7 @@ { "cell_type": "code", "execution_count": 49, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.001\n", @@ -931,9 +865,7 @@ { "cell_type": "code", "execution_count": 50, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "saver = tf.train.Saver()" @@ -1009,9 +941,7 @@ { "cell_type": "code", "execution_count": 55, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1027,9 +957,7 @@ { "cell_type": "code", "execution_count": 56, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", @@ -1039,9 +967,7 @@ { "cell_type": "code", "execution_count": 57, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_outputs = 1\n", @@ -1051,9 +977,7 @@ { "cell_type": "code", "execution_count": 58, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", @@ -1064,9 +988,7 @@ { "cell_type": "code", "execution_count": 59, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "loss = tf.reduce_mean(tf.square(outputs - y))\n", @@ -1216,9 +1138,7 @@ { "cell_type": "code", "execution_count": 66, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1232,9 +1152,7 @@ { "cell_type": "code", "execution_count": 67, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_neurons = 100\n", @@ -1249,9 +1167,7 @@ { "cell_type": "code", "execution_count": 68, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -1260,9 +1176,7 @@ { "cell_type": "code", "execution_count": 69, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.random.rand(2, n_steps, n_inputs)" @@ -1271,9 +1185,7 @@ { "cell_type": "code", "execution_count": 70, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -1307,9 +1219,7 @@ { "cell_type": "code", "execution_count": 72, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n", @@ -1329,9 +1239,7 @@ { "cell_type": "code", "execution_count": 73, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -1357,9 +1265,7 @@ { "cell_type": "code", "execution_count": 74, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1374,9 +1280,7 @@ { "cell_type": "code", "execution_count": 75, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n", @@ -1396,9 +1300,7 @@ { "cell_type": "code", "execution_count": 76, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -1427,9 +1329,7 @@ { "cell_type": "code", "execution_count": 78, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1438,22 +1338,33 @@ "n_neurons = 100\n", "n_layers = 3\n", "n_steps = 20\n", - "n_outputs = 1\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" + "n_outputs = 1" ] }, { "cell_type": "code", "execution_count": 79, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "keep_prob = 0.5\n", - "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "keep_prob = tf.placeholder_with_default(1.0, shape=())\n", "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", " for layer in range(n_layers)]\n", "cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", @@ -1464,10 +1375,8 @@ }, { "cell_type": "code", - "execution_count": 80, - "metadata": { - "collapsed": true - }, + "execution_count": 81, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.01\n", @@ -1484,78 +1393,29 @@ "saver = tf.train.Saver()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`." - ] - }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 82, "metadata": {}, "outputs": [], "source": [ - "n_iterations = 1000\n", + "n_iterations = 1500\n", "batch_size = 50\n", + "train_keep_prob = 0.5\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " X_batch, y_batch = next_batch(batch_size, n_steps)\n", - " _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n", - " if iteration % 100 == 0:\n", - " print(iteration, \"Training MSE:\", mse)\n", + " _, mse = sess.run([training_op, loss],\n", + " feed_dict={X: X_batch, y: y_batch,\n", + " keep_prob: train_keep_prob})\n", + " if iteration % 100 == 0: # not shown in the book\n", + " print(iteration, \"Training MSE:\", mse) # not shown\n", " \n", " saver.save(sess, \"./my_dropout_time_series_model\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 1\n", - "n_neurons = 100\n", - "n_layers = 3\n", - "n_steps = 20\n", - "n_outputs = 1\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", - "\n", - "keep_prob = 0.5\n", - "\n", - "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - " for layer in range(n_layers)]\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", - "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", - "\n", - "learning_rate = 0.01\n", - "\n", - "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", - "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", - "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", - "\n", - "loss = tf.reduce_mean(tf.square(outputs - y))\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, { "cell_type": "code", "execution_count": 83, @@ -1566,8 +1426,15 @@ " saver.restore(sess, \"./my_dropout_time_series_model\")\n", "\n", " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", - " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", - "\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ "plt.title(\"Testing the model\", fontsize=14)\n", "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", @@ -1585,59 +1452,6 @@ "Oops, it seems that Dropout does not help at all in this particular case. :/" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "import sys\n", - "training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", - "\n", - "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - " for layer in range(n_layers)]\n", - "if training:\n", - " cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", - " for cell in cells]\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", - "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", - "\n", - "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) # not shown in the book\n", - "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n", - "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) # not shown\n", - "loss = tf.reduce_mean(tf.square(outputs - y)) # not shown\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # not shown\n", - "training_op = optimizer.minimize(loss) # not shown\n", - "init = tf.global_variables_initializer() # not shown\n", - "saver = tf.train.Saver() # not shown\n", - "\n", - "with tf.Session() as sess:\n", - " if training:\n", - " init.run()\n", - " for iteration in range(n_iterations):\n", - " X_batch, y_batch = next_batch(batch_size, n_steps) # not shown\n", - " _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n", - " if iteration % 100 == 0: # not shown\n", - " print(iteration, \"Training MSE:\", mse) # not shown\n", - " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", - " else:\n", - " saver.restore(sess, \"/tmp/my_model.ckpt\")\n", - " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n", - " y_pred = sess.run(outputs, feed_dict={X: X_new}) # not shown" - ] - }, { "cell_type": "markdown", "metadata": {},