Set DropoutWrapper's input_keep_prob parameter using a placeholder, fixes #111

main
Aurélien Geron 2017-10-27 16:19:15 +02:00
parent b39d5366f7
commit fd0ce384f2
1 changed files with 89 additions and 275 deletions

View File

@ -31,9 +31,7 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
@ -79,9 +77,7 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf"
@ -104,9 +100,7 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -130,9 +124,7 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
@ -173,9 +165,7 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_inputs = 3\n",
@ -185,9 +175,7 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -204,9 +192,7 @@
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
@ -215,9 +201,7 @@
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
@ -249,9 +233,7 @@
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
@ -311,9 +293,7 @@
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_steps = 2\n",
@ -324,9 +304,7 @@
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -343,9 +321,7 @@
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
@ -354,9 +330,7 @@
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"X_batch = np.array([\n",
@ -400,9 +374,7 @@
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_steps = 2\n",
@ -413,9 +385,7 @@
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -429,9 +399,7 @@
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
@ -440,9 +408,7 @@
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"X_batch = np.array([\n",
@ -485,9 +451,7 @@
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_steps = 2\n",
@ -503,9 +467,7 @@
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"seq_length = tf.placeholder(tf.int32, [None])\n",
@ -516,9 +478,7 @@
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
@ -527,9 +487,7 @@
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"X_batch = np.array([\n",
@ -545,9 +503,7 @@
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
@ -593,9 +549,7 @@
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -668,9 +622,7 @@
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -688,9 +640,7 @@
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_neurons = 100\n",
@ -706,9 +656,7 @@
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"states_concat = tf.concat(axis=1, values=states)\n",
@ -754,9 +702,7 @@
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"t_min, t_max = 0, 30\n",
@ -808,9 +754,7 @@
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"X_batch, y_batch = next_batch(1, n_steps)"
@ -842,9 +786,7 @@
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -871,9 +813,7 @@
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -890,9 +830,7 @@
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
@ -903,9 +841,7 @@
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
@ -914,9 +850,7 @@
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"learning_rate = 0.001\n",
@ -931,9 +865,7 @@
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"saver = tf.train.Saver()"
@ -1009,9 +941,7 @@
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -1027,9 +957,7 @@
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
@ -1039,9 +967,7 @@
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_outputs = 1\n",
@ -1051,9 +977,7 @@
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
@ -1064,9 +988,7 @@
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
@ -1216,9 +1138,7 @@
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -1232,9 +1152,7 @@
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_neurons = 100\n",
@ -1249,9 +1167,7 @@
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
@ -1260,9 +1176,7 @@
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"X_batch = np.random.rand(2, n_steps, n_inputs)"
@ -1271,9 +1185,7 @@
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
@ -1307,9 +1219,7 @@
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n",
@ -1329,9 +1239,7 @@
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
@ -1357,9 +1265,7 @@
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -1374,9 +1280,7 @@
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
@ -1396,9 +1300,7 @@
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
@ -1427,9 +1329,7 @@
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -1438,22 +1338,33 @@
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 20\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
"n_outputs = 1"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"keep_prob = 0.5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention."
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"keep_prob = tf.placeholder_with_default(1.0, shape=())\n",
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
@ -1464,10 +1375,8 @@
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"collapsed": true
},
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"learning_rate = 0.01\n",
@ -1484,78 +1393,29 @@
"saver = tf.train.Saver()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`."
]
},
{
"cell_type": "code",
"execution_count": 81,
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"n_iterations = 1000\n",
"n_iterations = 1500\n",
"batch_size = 50\n",
"train_keep_prob = 0.5\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" print(iteration, \"Training MSE:\", mse)\n",
" _, mse = sess.run([training_op, loss],\n",
" feed_dict={X: X_batch, y: y_batch,\n",
" keep_prob: train_keep_prob})\n",
" if iteration % 100 == 0: # not shown in the book\n",
" print(iteration, \"Training MSE:\", mse) # not shown\n",
" \n",
" saver.save(sess, \"./my_dropout_time_series_model\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 20\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"keep_prob = 0.5\n",
"\n",
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"\n",
"learning_rate = 0.01\n",
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
"\n",
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 83,
@ -1566,8 +1426,15 @@
" saver.restore(sess, \"./my_dropout_time_series_model\")\n",
"\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
"\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
@ -1585,59 +1452,6 @@
"Oops, it seems that Dropout does not help at all in this particular case. :/"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"import sys\n",
"training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"if training:\n",
" cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
" for cell in cells]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) # not shown in the book\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) # not shown\n",
"loss = tf.reduce_mean(tf.square(outputs - y)) # not shown\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # not shown\n",
"training_op = optimizer.minimize(loss) # not shown\n",
"init = tf.global_variables_initializer() # not shown\n",
"saver = tf.train.Saver() # not shown\n",
"\n",
"with tf.Session() as sess:\n",
" if training:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps) # not shown\n",
" _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n",
" if iteration % 100 == 0: # not shown\n",
" print(iteration, \"Training MSE:\", mse) # not shown\n",
" save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
" else:\n",
" saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new}) # not shown"
]
},
{
"cell_type": "markdown",
"metadata": {},