Set DropoutWrapper's input_keep_prob parameter using a placeholder, fixes #111

2017-10-27 16:19:15 +02:00 · 2017-10-27 16:19:15 +02:00 · fd0ce384f2
commit fd0ce384f2
parent b39d5366f7
1 changed files with 89 additions and 275 deletions
--- a/14_recurrent_neural_networks.ipynb
+++ b/14_recurrent_neural_networks.ipynb
@ -31,9 +31,7 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# To support both python 2 and python 3\n",
@ -79,9 +77,7 @@
  {
   "cell_type": "code",
   "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
@ -104,9 +100,7 @@
  {
   "cell_type": "code",
   "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -130,9 +124,7 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
@ -173,9 +165,7 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_inputs = 3\n",
@ -185,9 +175,7 @@
  {
   "cell_type": "code",
   "execution_count": 8,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -204,9 +192,7 @@
  {
   "cell_type": "code",
   "execution_count": 9,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
@ -215,9 +201,7 @@
  {
   "cell_type": "code",
   "execution_count": 10,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
@ -249,9 +233,7 @@
  {
   "cell_type": "code",
   "execution_count": 13,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import clear_output, Image, display, HTML\n",
@ -311,9 +293,7 @@
  {
   "cell_type": "code",
   "execution_count": 15,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = 2\n",
@ -324,9 +304,7 @@
  {
   "cell_type": "code",
   "execution_count": 16,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -343,9 +321,7 @@
  {
   "cell_type": "code",
   "execution_count": 17,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
@ -354,9 +330,7 @@
  {
   "cell_type": "code",
   "execution_count": 18,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
@ -400,9 +374,7 @@
  {
   "cell_type": "code",
   "execution_count": 21,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = 2\n",
@ -413,9 +385,7 @@
  {
   "cell_type": "code",
   "execution_count": 22,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -429,9 +399,7 @@
  {
   "cell_type": "code",
   "execution_count": 23,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
@ -440,9 +408,7 @@
  {
   "cell_type": "code",
   "execution_count": 24,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
@ -485,9 +451,7 @@
  {
   "cell_type": "code",
   "execution_count": 27,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = 2\n",
@ -503,9 +467,7 @@
  {
   "cell_type": "code",
   "execution_count": 28,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "seq_length = tf.placeholder(tf.int32, [None])\n",
@ -516,9 +478,7 @@
  {
   "cell_type": "code",
   "execution_count": 29,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
@ -527,9 +487,7 @@
  {
   "cell_type": "code",
   "execution_count": 30,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
@ -545,9 +503,7 @@
  {
   "cell_type": "code",
   "execution_count": 31,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
@ -593,9 +549,7 @@
  {
   "cell_type": "code",
   "execution_count": 34,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -668,9 +622,7 @@
  {
   "cell_type": "code",
   "execution_count": 37,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -688,9 +640,7 @@
  {
   "cell_type": "code",
   "execution_count": 38,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_neurons = 100\n",
@ -706,9 +656,7 @@
  {
   "cell_type": "code",
   "execution_count": 39,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "states_concat = tf.concat(axis=1, values=states)\n",
@ -754,9 +702,7 @@
  {
   "cell_type": "code",
   "execution_count": 41,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "t_min, t_max = 0, 30\n",
@ -808,9 +754,7 @@
  {
   "cell_type": "code",
   "execution_count": 43,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch, y_batch = next_batch(1, n_steps)"
@ -842,9 +786,7 @@
  {
   "cell_type": "code",
   "execution_count": 45,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -871,9 +813,7 @@
  {
   "cell_type": "code",
   "execution_count": 46,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -890,9 +830,7 @@
  {
   "cell_type": "code",
   "execution_count": 47,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
@ -903,9 +841,7 @@
  {
   "cell_type": "code",
   "execution_count": 48,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
@ -914,9 +850,7 @@
  {
   "cell_type": "code",
   "execution_count": 49,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 0.001\n",
@ -931,9 +865,7 @@
  {
   "cell_type": "code",
   "execution_count": 50,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "saver = tf.train.Saver()"
@ -1009,9 +941,7 @@
  {
   "cell_type": "code",
   "execution_count": 55,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -1027,9 +957,7 @@
  {
   "cell_type": "code",
   "execution_count": 56,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
@ -1039,9 +967,7 @@
  {
   "cell_type": "code",
   "execution_count": 57,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_outputs = 1\n",
@ -1051,9 +977,7 @@
  {
   "cell_type": "code",
   "execution_count": 58,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
@ -1064,9 +988,7 @@
  {
   "cell_type": "code",
   "execution_count": 59,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
@ -1216,9 +1138,7 @@
  {
   "cell_type": "code",
   "execution_count": 66,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -1232,9 +1152,7 @@
  {
   "cell_type": "code",
   "execution_count": 67,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "n_neurons = 100\n",
@ -1249,9 +1167,7 @@
  {
   "cell_type": "code",
   "execution_count": 68,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
@ -1260,9 +1176,7 @@
  {
   "cell_type": "code",
   "execution_count": 69,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.random.rand(2, n_steps, n_inputs)"
@ -1271,9 +1185,7 @@
  {
   "cell_type": "code",
   "execution_count": 70,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
@ -1307,9 +1219,7 @@
  {
   "cell_type": "code",
   "execution_count": 72,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.device(\"/gpu:0\"):  # BAD! This is ignored.\n",
@ -1329,9 +1239,7 @@
  {
   "cell_type": "code",
   "execution_count": 73,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
@ -1357,9 +1265,7 @@
  {
   "cell_type": "code",
   "execution_count": 74,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -1374,9 +1280,7 @@
  {
   "cell_type": "code",
   "execution_count": 75,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
@ -1396,9 +1300,7 @@
  {
   "cell_type": "code",
   "execution_count": 76,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
@ -1427,9 +1329,7 @@
  {
   "cell_type": "code",
   "execution_count": 78,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -1438,22 +1338,33 @@
    "n_neurons = 100\n",
    "n_layers = 3\n",
    "n_steps = 20\n",
-    "n_outputs = 1\n",
-    "\n",
-    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
+    "n_outputs = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
-    "keep_prob = 0.5\n",
-    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keep_prob = tf.placeholder_with_default(1.0, shape=())\n",
    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "         for layer in range(n_layers)]\n",
    "cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
@ -1464,10 +1375,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 80,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 81,
+   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 0.01\n",
@ -1484,78 +1393,29 @@
    "saver = tf.train.Saver()"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`."
-   ]
-  },
  {
   "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
-    "n_iterations = 1000\n",
+    "n_iterations = 1500\n",
    "batch_size = 50\n",
+    "train_keep_prob = 0.5\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
-    "        _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n",
-    "        if iteration % 100 == 0:\n",
-    "            print(iteration, \"Training MSE:\", mse)\n",
+    "        _, mse = sess.run([training_op, loss],\n",
+    "                          feed_dict={X: X_batch, y: y_batch,\n",
+    "                                     keep_prob: train_keep_prob})\n",
+    "        if iteration % 100 == 0:                   # not shown in the book\n",
+    "            print(iteration, \"Training MSE:\", mse) # not shown\n",
    "    \n",
    "    saver.save(sess, \"./my_dropout_time_series_model\")"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 82,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "reset_graph()\n",
-    "\n",
-    "n_inputs = 1\n",
-    "n_neurons = 100\n",
-    "n_layers = 3\n",
-    "n_steps = 20\n",
-    "n_outputs = 1\n",
-    "\n",
-    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
-    "\n",
-    "keep_prob = 0.5\n",
-    "\n",
-    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "         for layer in range(n_layers)]\n",
-    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
-    "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
-    "\n",
-    "learning_rate = 0.01\n",
-    "\n",
-    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
-    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
-    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
-    "\n",
-    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
-    "\n",
-    "init = tf.global_variables_initializer()\n",
-    "saver = tf.train.Saver()"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 83,
@ -1566,8 +1426,15 @@
    "    saver.restore(sess, \"./my_dropout_time_series_model\")\n",
    "\n",
    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
-    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
-    "\n",
+    "    y_pred = sess.run(outputs, feed_dict={X: X_new})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "plt.title(\"Testing the model\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
@ -1585,59 +1452,6 @@
    "Oops, it seems that Dropout does not help at all in this particular case. :/"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 84,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "reset_graph()\n",
-    "\n",
-    "import sys\n",
-    "training = True  # in a script, this would be (sys.argv[-1] == \"train\") instead\n",
-    "\n",
-    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
-    "\n",
-    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "         for layer in range(n_layers)]\n",
-    "if training:\n",
-    "    cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
-    "             for cell in cells]\n",
-    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
-    "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
-    "\n",
-    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])    # not shown in the book\n",
-    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n",
-    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])   # not shown\n",
-    "loss = tf.reduce_mean(tf.square(outputs - y))                     # not shown\n",
-    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)   # not shown\n",
-    "training_op = optimizer.minimize(loss)                            # not shown\n",
-    "init = tf.global_variables_initializer()                          # not shown\n",
-    "saver = tf.train.Saver()                                          # not shown\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    if training:\n",
-    "        init.run()\n",
-    "        for iteration in range(n_iterations):\n",
-    "            X_batch, y_batch = next_batch(batch_size, n_steps)    # not shown\n",
-    "            _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n",
-    "            if iteration % 100 == 0:                              # not shown\n",
-    "                print(iteration, \"Training MSE:\", mse)            # not shown\n",
-    "        save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
-    "    else:\n",
-    "        saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
-    "        X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n",
-    "        y_pred = sess.run(outputs, feed_dict={X: X_new})                              # not shown"
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},