From 9910d31ec3688a0095f60a80e09af4485e65bdad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Mon, 5 Jun 2017 19:18:20 +0200 Subject: [PATCH] Upgrade notebook 14 to support TF 1.1+ (there were breaking changes with RNNs) --- 14_recurrent_neural_networks.ipynb | 1332 +++++++++++++++++++++------- 1 file changed, 998 insertions(+), 334 deletions(-) diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb index 5d26a58..380ff29 100644 --- a/14_recurrent_neural_networks.ipynb +++ b/14_recurrent_neural_networks.ipynb @@ -142,8 +142,8 @@ "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", "\n", - "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n", - "Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n", + "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n", + "Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n", "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n", "\n", "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n", @@ -162,6 +162,8 @@ }, "outputs": [], "source": [ + "import numpy as np\n", + "\n", "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n", "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n", "\n", @@ -210,7 +212,7 @@ "cell_type": "code", "execution_count": 7, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -219,16 +221,7 @@ "tf.reset_default_graph()\n", "\n", "n_inputs = 3\n", - "n_neurons = 5\n", - "\n", - "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", - "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", - "\n", - "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1], dtype=tf.float32)\n", - "Y0, Y1 = output_seqs\n", - "\n", - "init = tf.global_variables_initializer()" + "n_neurons = 5" ] }, { @@ -240,6 +233,38 @@ "editable": true }, "outputs": [], + "source": [ + "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", + "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", + "\n", + "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", + "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],\n", + " dtype=tf.float32)\n", + "Y0, Y1 = output_seqs" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], "source": [ "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n", "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n", @@ -251,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, @@ -264,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, @@ -277,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": { "collapsed": true, "deletable": true, @@ -325,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": { "collapsed": false, "deletable": true, @@ -348,9 +373,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -360,21 +385,44 @@ "\n", "n_steps = 2\n", "n_inputs = 3\n", - "n_neurons = 5\n", - "\n", + "n_neurons = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32)\n", - "outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])\n", - "\n", + "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,\n", + " dtype=tf.float32)\n", + "outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "metadata": { "collapsed": false, "deletable": true, @@ -397,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "metadata": { "collapsed": false, "deletable": true, @@ -420,9 +468,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -432,19 +480,41 @@ "\n", "n_steps = 2\n", "n_inputs = 3\n", - "n_neurons = 5\n", - "\n", + "n_neurons = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", - "\n", + "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, @@ -466,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, @@ -489,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 25, "metadata": { "collapsed": true, "deletable": true, @@ -504,17 +574,40 @@ "n_neurons = 5\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "seq_length = tf.placeholder(tf.int32, [None])\n", - "\n", - "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n", - "\n", + "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n", + " sequence_length=seq_length)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, @@ -529,8 +622,19 @@ " [[6, 7, 8], [6, 5, 4]], # instance 3\n", " [[9, 0, 1], [3, 2, 1]], # instance 4\n", " ])\n", - "seq_length_batch = np.array([2, 1, 2, 2])\n", - "\n", + "seq_length_batch = np.array([2, 1, 2, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "with tf.Session() as sess:\n", " init.run()\n", " outputs_val, states_val = sess.run(\n", @@ -539,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, @@ -552,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, @@ -587,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, @@ -607,12 +711,12 @@ "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.int32, [None])\n", "\n", - "with tf.variable_scope(\"rnn\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n", - " basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", - " outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", + "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", + "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", "\n", "logits = tf.layers.dense(states, n_outputs)\n", - "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", + "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n", + " logits=logits)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", @@ -624,7 +728,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, @@ -640,7 +744,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, @@ -675,9 +779,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 35, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -687,21 +791,46 @@ "\n", "n_steps = 28\n", "n_inputs = 28\n", - "n_neurons1 = 150\n", - "n_neurons2 = 100\n", "n_outputs = 10\n", "\n", "learning_rate = 0.001\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.int32, [None])\n", + "y = tf.placeholder(tf.int32, [None])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_neurons = 100\n", + "n_layers = 3\n", "\n", - "hidden1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n", - "hidden2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n", - "outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", - "states = tf.concat(axis=1, values=states_tuple)\n", - "logits = tf.layers.dense(states, n_outputs)\n", + "layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,\n", + " activation=tf.nn.relu)\n", + " for layer in range(n_layers)]\n", + "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "states_concat = tf.concat(axis=1, values=states)\n", + "logits = tf.layers.dense(states_concat, n_outputs)\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", @@ -714,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, @@ -722,7 +851,7 @@ }, "outputs": [], "source": [ - "n_epochs = 100\n", + "n_epochs = 10\n", "batch_size = 150\n", "\n", "with tf.Session() as sess:\n", @@ -749,7 +878,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 39, "metadata": { "collapsed": false, "deletable": true, @@ -772,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, @@ -780,7 +909,7 @@ }, "outputs": [], "source": [ - "t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n", + "t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n", "\n", "n_steps = 20\n", "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n", @@ -809,7 +938,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 41, "metadata": { "collapsed": false, "deletable": true, @@ -822,7 +951,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, @@ -844,10 +973,20 @@ ] }, { - "cell_type": "code", - "execution_count": 32, + "cell_type": "markdown", "metadata": { - "collapsed": false, + "deletable": true, + "editable": true + }, + "source": [ + "Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": true, "deletable": true, "editable": true }, @@ -863,15 +1002,82 @@ "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", "\n", + "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", + "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_steps = 20\n", + "n_inputs = 1\n", + "n_neurons = 100\n", + "n_outputs = 1\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "cell = tf.contrib.rnn.OutputProjectionWrapper(\n", " tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n", - " output_size=n_outputs)\n", - "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n", - "\n", - "n_outputs = 1\n", + " output_size=n_outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "learning_rate = 0.001\n", "\n", - "loss = tf.reduce_sum(tf.square(outputs - y))\n", + "loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "\n", @@ -880,7 +1086,20 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 48, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, "metadata": { "collapsed": false, "deletable": true, @@ -888,7 +1107,7 @@ }, "outputs": [], "source": [ - "n_iterations = 1000\n", + "n_iterations = 1500\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", @@ -900,14 +1119,42 @@ " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", " print(iteration, \"\\tMSE:\", mse)\n", " \n", - " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", - " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", - " print(y_pred)" + " saver.save(sess, \"./my_time_series_model\") # not shown in the book" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 50, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess: # not shown in the book\n", + " saver.restore(sess, \"./my_time_series_model\") # not shown\n", + "\n", + " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 52, "metadata": { "collapsed": false, "deletable": true, @@ -938,9 +1185,9 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 53, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -953,28 +1200,12 @@ "n_neurons = 100\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", - "\n", - "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", - "rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", - "\n", - "n_outputs = 1\n", - "learning_rate = 0.001\n", - "\n", - "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", - "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", - "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", - "\n", - "loss = tf.reduce_sum(tf.square(outputs - y))\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "init = tf.global_variables_initializer()" + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 54, "metadata": { "collapsed": false, "deletable": true, @@ -982,7 +1213,68 @@ }, "outputs": [], "source": [ - "n_iterations = 1000\n", + "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", + "rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_outputs = 1\n", + "learning_rate = 0.001" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", + "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", + "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "loss = tf.reduce_mean(tf.square(outputs - y))\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "\n", + "init = tf.global_variables_initializer()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_iterations = 1500\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", @@ -996,12 +1288,26 @@ " \n", " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", - " print(y_pred)" + " \n", + " saver.save(sess, \"./my_time_series_model\")" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 59, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1031,7 +1337,45 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 61, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess: # not shown in the book\n", + " saver.restore(sess, \"./my_time_series_model\") # not shown\n", + "\n", + " sequence = [0.] * n_steps\n", + " for iteration in range(300):\n", + " X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n", + " y_pred = sess.run(outputs, feed_dict={X: X_batch})\n", + " sequence.append(y_pred[0, -1, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(8,4))\n", + "plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n", + "plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n", + "plt.xlabel(\"Time\")\n", + "plt.ylabel(\"Value\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, "metadata": { "collapsed": false, "deletable": true, @@ -1039,16 +1383,8 @@ }, "outputs": [], "source": [ - "n_iterations = 2000\n", - "batch_size = 50\n", "with tf.Session() as sess:\n", - " init.run()\n", - " for iteration in range(n_iterations):\n", - " X_batch, y_batch = next_batch(batch_size, n_steps)\n", - " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " if iteration % 100 == 0:\n", - " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", - " print(iteration, \"\\tMSE:\", mse)\n", + " saver.restore(sess, \"./my_time_series_model\")\n", "\n", " sequence1 = [0. for i in range(n_steps)]\n", " for iteration in range(len(t) - n_steps):\n", @@ -1073,7 +1409,7 @@ "plt.plot(t, sequence2, \"b-\")\n", "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n", "plt.xlabel(\"Time\")\n", - "#save_fig(\"creative_sequence_plot\")\n", + "save_fig(\"creative_sequence_plot\")\n", "plt.show()" ] }, @@ -1099,7 +1435,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 64, "metadata": { "collapsed": true, "deletable": true, @@ -1110,22 +1446,46 @@ "tf.reset_default_graph()\n", "\n", "n_inputs = 2\n", + "n_steps = 5\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "n_neurons = 100\n", "n_layers = 3\n", - "n_steps = 5\n", - "keep_prob = 0.5\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([basic_cell] * n_layers)\n", - "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "\n", + "layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", + " for layer in range(n_layers)]\n", + "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 67, "metadata": { "collapsed": true, "deletable": true, @@ -1138,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 68, "metadata": { "collapsed": true, "deletable": true, @@ -1153,7 +1513,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 69, "metadata": { "collapsed": false, "deletable": true, @@ -1171,91 +1531,7 @@ "editable": true }, "source": [ - "## Dropout" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "tf.reset_default_graph()\n", - "\n", - "n_inputs = 1\n", - "n_neurons = 100\n", - "n_layers = 3\n", - "n_steps = 20\n", - "n_outputs = 1\n", - "\n", - "keep_prob = 0.5\n", - "learning_rate = 0.001\n", - "\n", - "is_training = True\n", - "\n", - "def deep_rnn_with_dropout(X, y, is_training):\n", - " cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - " if is_training:\n", - " cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", - " multi_layer_cell = tf.contrib.rnn.MultiRNNCell([cell] * n_layers)\n", - " rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", - "\n", - " stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", - " stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", - " outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", - "\n", - " loss = tf.reduce_sum(tf.square(outputs - y))\n", - " optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - " training_op = optimizer.minimize(loss)\n", - "\n", - " return outputs, loss, training_op\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", - "outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "n_iterations = 2000\n", - "batch_size = 50\n", - "\n", - "with tf.Session() as sess:\n", - " if is_training:\n", - " init.run()\n", - " for iteration in range(n_iterations):\n", - " X_batch, y_batch = next_batch(batch_size, n_steps)\n", - " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " if iteration % 100 == 0:\n", - " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", - " print(iteration, \"\\tMSE:\", mse)\n", - " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", - " else:\n", - " saver.restore(sess, \"/tmp/my_model.ckpt\")\n", - " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", - " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", - " \n", - " plt.title(\"Testing the model\", fontsize=14)\n", - " plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", - " plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", - " plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", - " plt.legend(loc=\"upper left\")\n", - " plt.xlabel(\"Time\")\n", - " plt.show()" + "## Distributing a Deep RNN Across Multiple GPUs" ] }, { @@ -1265,96 +1541,24 @@ "editable": true }, "source": [ - "# LSTM" + "Do **NOT** do this:" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 70, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n", + " layer1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", "\n", - "n_steps = 28\n", - "n_inputs = 28\n", - "n_neurons = 150\n", - "n_outputs = 10\n", - "\n", - "learning_rate = 0.001\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.int32, [None])\n", - "\n", - "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n", - "multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n", - "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n", - "top_layer_h_state = states[-1][1]\n", - "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n", - "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", - "loss = tf.reduce_mean(xentropy, name=\"loss\")\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "correct = tf.nn.in_top_k(logits, y, 1)\n", - "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", - " \n", - "init = tf.global_variables_initializer()" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "states" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "top_layer_h_state" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "scrolled": true - }, - "outputs": [], - "source": [ - "n_epochs = 10\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " for iteration in range(mnist.train.num_examples // batch_size):\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n", - " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", - " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", - " print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)" + "with tf.device(\"/gpu:1\"): # BAD! Ignored again.\n", + " layer2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)" ] }, { @@ -1364,12 +1568,12 @@ "editable": true }, "source": [ - "# Distributing layers across devices" + "Instead, you need a `DeviceCellWrapper`:" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 71, "metadata": { "collapsed": false, "deletable": true, @@ -1399,7 +1603,278 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 72, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 5\n", + "n_steps = 20\n", + "n_neurons = 100\n", + "\n", + "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n", + "cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n", + " for dev in devices]\n", + "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " init.run()\n", + " print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Dropout" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 1\n", + "n_neurons = 100\n", + "n_layers = 3\n", + "n_steps = 20\n", + "n_outputs = 1\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "keep_prob = 0.5\n", + "\n", + "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", + " for layer in range(n_layers)]\n", + "cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", + " for cell in cells]\n", + "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)\n", + "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "learning_rate = 0.01\n", + "\n", + "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", + "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", + "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", + "\n", + "loss = tf.reduce_mean(tf.square(outputs - y))\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "\n", + "init = tf.global_variables_initializer()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_iterations = 1000\n", + "batch_size = 50\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for iteration in range(n_iterations):\n", + " X_batch, y_batch = next_batch(batch_size, n_steps)\n", + " _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n", + " if iteration % 100 == 0:\n", + " print(iteration, \"Training MSE:\", mse)\n", + " \n", + " saver.save(sess, \"./my_dropout_time_series_model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_inputs = 1\n", + "n_neurons = 100\n", + "n_layers = 3\n", + "n_steps = 20\n", + "n_outputs = 1\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", + "\n", + "keep_prob = 0.5\n", + "\n", + "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", + " for layer in range(n_layers)]\n", + "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", + "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "\n", + "learning_rate = 0.01\n", + "\n", + "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", + "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", + "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", + "\n", + "loss = tf.reduce_mean(tf.square(outputs - y))\n", + "\n", + "init = tf.global_variables_initializer()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " saver.restore(sess, \"./my_dropout_time_series_model\")\n", + "\n", + " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", + "\n", + "plt.title(\"Testing the model\", fontsize=14)\n", + "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", + "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", + "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", + "plt.legend(loc=\"upper left\")\n", + "plt.xlabel(\"Time\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Oops, it seems that Dropout does not help at all in this particular case. :/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:" + ] + }, + { + "cell_type": "code", + "execution_count": 82, "metadata": { "collapsed": false, "deletable": true, @@ -1409,21 +1884,109 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "n_inputs = 5\n", - "n_neurons = 100\n", - "devices = [\"/cpu:0\"]*5\n", - "n_steps = 20\n", - "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n", - "lstm_cells = [DeviceCellWrapper(device, tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n", - " for device in devices]\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n", - "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "import sys\n", + "training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", + "\n", + "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", + " for layer in range(n_layers)]\n", + "if training:\n", + " cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", + " for cell in cells]\n", + "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", + "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", + "\n", + "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) # not shown in the book\n", + "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n", + "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) # not shown\n", + "loss = tf.reduce_mean(tf.square(outputs - y)) # not shown\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # not shown\n", + "training_op = optimizer.minimize(loss) # not shown\n", + "init = tf.global_variables_initializer() # not shown\n", + "saver = tf.train.Saver() # not shown\n", + "\n", + "with tf.Session() as sess:\n", + " if training:\n", + " init.run()\n", + " for iteration in range(n_iterations):\n", + " X_batch, y_batch = next_batch(batch_size, n_steps) # not shown\n", + " _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n", + " if iteration % 100 == 0: # not shown\n", + " print(iteration, \"Training MSE:\", mse) # not shown\n", + " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", + " else:\n", + " saver.restore(sess, \"/tmp/my_model.ckpt\")\n", + " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new}) # not shown" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# LSTM" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "n_steps = 28\n", + "n_inputs = 28\n", + "n_neurons = 150\n", + "n_outputs = 10\n", + "n_layers = 3\n", + "\n", + "learning_rate = 0.001\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.int32, [None])\n", + "\n", + "lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n", + " for layer in range(n_layers)]\n", + "multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n", + "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n", + "top_layer_h_state = states[-1][1]\n", + "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n", + "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", + "loss = tf.reduce_mean(xentropy, name=\"loss\")\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "correct = tf.nn.in_top_k(logits, y, 1)\n", + "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", + " \n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 85, "metadata": { "collapsed": false, "deletable": true, @@ -1431,9 +1994,72 @@ }, "outputs": [], "source": [ + "states" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "top_layer_h_state" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "n_epochs = 10\n", + "batch_size = 150\n", + "\n", "with tf.Session() as sess:\n", " init.run()\n", - " print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))" + " for epoch in range(n_epochs):\n", + " for iteration in range(mnist.train.num_examples // batch_size):\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", + " print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)" ] }, { @@ -1468,7 +2094,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 90, "metadata": { "collapsed": true, "deletable": true, @@ -1511,7 +2137,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 91, "metadata": { "collapsed": false, "deletable": true, @@ -1524,7 +2150,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 92, "metadata": { "collapsed": false, "deletable": true, @@ -1547,7 +2173,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 93, "metadata": { "collapsed": false, "deletable": true, @@ -1567,7 +2193,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 94, "metadata": { "collapsed": false, "deletable": true, @@ -1580,7 +2206,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 95, "metadata": { "collapsed": false, "deletable": true, @@ -1593,7 +2219,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 96, "metadata": { "collapsed": false, "deletable": true, @@ -1616,7 +2242,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 97, "metadata": { "collapsed": true, "deletable": true, @@ -1654,7 +2280,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 98, "metadata": { "collapsed": false, "deletable": true, @@ -1668,7 +2294,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 99, "metadata": { "collapsed": false, "deletable": true, @@ -1681,7 +2307,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 100, "metadata": { "collapsed": false, "deletable": true, @@ -1704,7 +2330,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 101, "metadata": { "collapsed": true, "deletable": true, @@ -1730,9 +2356,9 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 102, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, @@ -1741,15 +2367,52 @@ "tf.reset_default_graph()\n", "\n", "# Input data.\n", - "train_inputs = tf.placeholder(tf.int32, shape=[batch_size])\n", "train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n", - "valid_dataset = tf.constant(valid_examples, dtype=tf.int32)\n", + "valid_dataset = tf.constant(valid_examples, dtype=tf.int32)" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "vocabulary_size = 50000\n", + "embedding_size = 150\n", "\n", "# Look up embeddings for inputs.\n", - "init_embeddings = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n", - "embeddings = tf.Variable(init_embeddings)\n", - "embed = tf.nn.embedding_lookup(embeddings, train_inputs)\n", - "\n", + "init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n", + "embeddings = tf.Variable(init_embeds)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "train_inputs = tf.placeholder(tf.int32, shape=[None])\n", + "embed = tf.nn.embedding_lookup(embeddings, train_inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "# Construct the variables for the NCE loss\n", "nce_weights = tf.Variable(\n", " tf.truncated_normal([vocabulary_size, embedding_size],\n", @@ -1789,7 +2452,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 106, "metadata": { "collapsed": false, "deletable": true, @@ -1848,7 +2511,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 107, "metadata": { "collapsed": false, "deletable": true, @@ -1871,7 +2534,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 108, "metadata": { "collapsed": true, "deletable": true, @@ -1895,7 +2558,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 109, "metadata": { "collapsed": false, "deletable": true, @@ -1934,7 +2597,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 111, "metadata": { "collapsed": false, "deletable": true, @@ -1962,8 +2625,9 @@ "encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n", "decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n", "\n", - "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n", - "cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * n_layers)\n", + "lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n", + " for layer in range(n_layers)]\n", + "cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n", "\n", "output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n", " encoder_inputs,\n", @@ -1978,7 +2642,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 112, "metadata": { "collapsed": false, "deletable": true,