From 9910d31ec3688a0095f60a80e09af4485e65bdad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Mon, 5 Jun 2017 19:18:20 +0200
Subject: [PATCH] Upgrade notebook 14 to support TF 1.1+ (there were breaking
 changes with RNNs)

---
 14_recurrent_neural_networks.ipynb | 1332 +++++++++++++++++++++-------
 1 file changed, 998 insertions(+), 334 deletions(-)

diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb
index 5d26a58..380ff29 100644
--- a/14_recurrent_neural_networks.ipynb
+++ b/14_recurrent_neural_networks.ipynb
@@ -142,8 +142,8 @@
     "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
     "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
     "\n",
-    "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n",
-    "Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n",
+    "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n",
+    "Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n",
     "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
     "\n",
     "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
@@ -162,6 +162,8 @@
    },
    "outputs": [],
    "source": [
+    "import numpy as np\n",
+    "\n",
     "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
     "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
     "\n",
@@ -210,7 +212,7 @@
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -219,16 +221,7 @@
     "tf.reset_default_graph()\n",
     "\n",
     "n_inputs = 3\n",
-    "n_neurons = 5\n",
-    "\n",
-    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
-    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
-    "\n",
-    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1], dtype=tf.float32)\n",
-    "Y0, Y1 = output_seqs\n",
-    "\n",
-    "init = tf.global_variables_initializer()"
+    "n_neurons = 5"
    ]
   },
   {
@@ -240,6 +233,38 @@
     "editable": true
    },
    "outputs": [],
+   "source": [
+    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "\n",
+    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
+    "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],\n",
+    "                                                dtype=tf.float32)\n",
+    "Y0, Y1 = output_seqs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "init = tf.global_variables_initializer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
    "source": [
     "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
     "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
@@ -251,7 +276,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -264,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -277,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -325,7 +350,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -348,9 +373,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -360,21 +385,44 @@
     "\n",
     "n_steps = 2\n",
     "n_inputs = 3\n",
-    "n_neurons = 5\n",
-    "\n",
+    "n_neurons = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
     "X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n",
     "\n",
     "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32)\n",
-    "outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])\n",
-    "\n",
+    "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,\n",
+    "                                                dtype=tf.float32)\n",
+    "outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "init = tf.global_variables_initializer()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 18,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -397,7 +445,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 19,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -420,9 +468,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 20,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -432,19 +480,41 @@
     "\n",
     "n_steps = 2\n",
     "n_inputs = 3\n",
-    "n_neurons = 5\n",
-    "\n",
+    "n_neurons = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
     "\n",
     "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
-    "\n",
+    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "init = tf.global_variables_initializer()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 23,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -466,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 24,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -489,7 +559,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 25,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -504,17 +574,40 @@
     "n_neurons = 5\n",
     "\n",
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "seq_length = tf.placeholder(tf.int32, [None])\n",
-    "\n",
-    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n",
-    "\n",
+    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n",
+    "                                    sequence_length=seq_length)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "init = tf.global_variables_initializer()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 28,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -529,8 +622,19 @@
     "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
     "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
     "    ])\n",
-    "seq_length_batch = np.array([2, 1, 2, 2])\n",
-    "\n",
+    "seq_length_batch = np.array([2, 1, 2, 2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "with tf.Session() as sess:\n",
     "    init.run()\n",
     "    outputs_val, states_val = sess.run(\n",
@@ -539,7 +643,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 30,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -552,7 +656,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 31,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -587,7 +691,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 32,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -607,12 +711,12 @@
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
     "y = tf.placeholder(tf.int32, [None])\n",
     "\n",
-    "with tf.variable_scope(\"rnn\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n",
-    "    basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
-    "    outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
+    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
+    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
     "\n",
     "logits = tf.layers.dense(states, n_outputs)\n",
-    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
+    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
+    "                                                          logits=logits)\n",
     "loss = tf.reduce_mean(xentropy)\n",
     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
     "training_op = optimizer.minimize(loss)\n",
@@ -624,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 33,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -640,7 +744,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 34,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -675,9 +779,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 35,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -687,21 +791,46 @@
     "\n",
     "n_steps = 28\n",
     "n_inputs = 28\n",
-    "n_neurons1 = 150\n",
-    "n_neurons2 = 100\n",
     "n_outputs = 10\n",
     "\n",
     "learning_rate = 0.001\n",
     "\n",
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.int32, [None])\n",
+    "y = tf.placeholder(tf.int32, [None])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_neurons = 100\n",
+    "n_layers = 3\n",
     "\n",
-    "hidden1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n",
-    "hidden2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n",
-    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n",
-    "outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
-    "states = tf.concat(axis=1, values=states_tuple)\n",
-    "logits = tf.layers.dense(states, n_outputs)\n",
+    "layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,\n",
+    "                                      activation=tf.nn.relu)\n",
+    "          for layer in range(n_layers)]\n",
+    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "states_concat = tf.concat(axis=1, values=states)\n",
+    "logits = tf.layers.dense(states_concat, n_outputs)\n",
     "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
     "loss = tf.reduce_mean(xentropy)\n",
     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
@@ -714,7 +843,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 38,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -722,7 +851,7 @@
    },
    "outputs": [],
    "source": [
-    "n_epochs = 100\n",
+    "n_epochs = 10\n",
     "batch_size = 150\n",
     "\n",
     "with tf.Session() as sess:\n",
@@ -749,7 +878,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 39,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -772,7 +901,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 40,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -780,7 +909,7 @@
    },
    "outputs": [],
    "source": [
-    "t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n",
+    "t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n",
     "\n",
     "n_steps = 20\n",
     "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
@@ -809,7 +938,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 41,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -822,7 +951,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 42,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -844,10 +973,20 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 32,
+   "cell_type": "markdown",
    "metadata": {
-    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -863,15 +1002,82 @@
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
     "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
     "\n",
+    "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
+    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_steps = 20\n",
+    "n_inputs = 1\n",
+    "n_neurons = 100\n",
+    "n_outputs = 1\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
     "    tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
-    "    output_size=n_outputs)\n",
-    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n",
-    "\n",
-    "n_outputs = 1\n",
+    "    output_size=n_outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "learning_rate = 0.001\n",
     "\n",
-    "loss = tf.reduce_sum(tf.square(outputs - y))\n",
+    "loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n",
     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
     "training_op = optimizer.minimize(loss)\n",
     "\n",
@@ -880,7 +1086,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -888,7 +1107,7 @@
    },
    "outputs": [],
    "source": [
-    "n_iterations = 1000\n",
+    "n_iterations = 1500\n",
     "batch_size = 50\n",
     "\n",
     "with tf.Session() as sess:\n",
@@ -900,14 +1119,42 @@
     "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
     "            print(iteration, \"\\tMSE:\", mse)\n",
     "    \n",
-    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
-    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
-    "    print(y_pred)"
+    "    saver.save(sess, \"./my_time_series_model\") # not shown in the book"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:                          # not shown in the book\n",
+    "    saver.restore(sess, \"./my_time_series_model\")   # not shown\n",
+    "\n",
+    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
+    "    y_pred = sess.run(outputs, feed_dict={X: X_new})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "y_pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -938,9 +1185,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 53,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -953,28 +1200,12 @@
     "n_neurons = 100\n",
     "\n",
     "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
-    "\n",
-    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
-    "rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
-    "\n",
-    "n_outputs = 1\n",
-    "learning_rate = 0.001\n",
-    "\n",
-    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
-    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
-    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
-    "\n",
-    "loss = tf.reduce_sum(tf.square(outputs - y))\n",
-    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
-    "training_op = optimizer.minimize(loss)\n",
-    "\n",
-    "init = tf.global_variables_initializer()"
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 54,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -982,7 +1213,68 @@
    },
    "outputs": [],
    "source": [
-    "n_iterations = 1000\n",
+    "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
+    "rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_outputs = 1\n",
+    "learning_rate = 0.001"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
+    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
+    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.global_variables_initializer()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 1500\n",
     "batch_size = 50\n",
     "\n",
     "with tf.Session() as sess:\n",
@@ -996,12 +1288,26 @@
     "    \n",
     "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
     "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
-    "    print(y_pred)"
+    "    \n",
+    "    saver.save(sess, \"./my_time_series_model\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 59,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "y_pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1031,7 +1337,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 61,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:                        # not shown in the book\n",
+    "    saver.restore(sess, \"./my_time_series_model\") # not shown\n",
+    "\n",
+    "    sequence = [0.] * n_steps\n",
+    "    for iteration in range(300):\n",
+    "        X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n",
+    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
+    "        sequence.append(y_pred[0, -1, 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(8,4))\n",
+    "plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n",
+    "plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n",
+    "plt.xlabel(\"Time\")\n",
+    "plt.ylabel(\"Value\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1039,16 +1383,8 @@
    },
    "outputs": [],
    "source": [
-    "n_iterations = 2000\n",
-    "batch_size = 50\n",
     "with tf.Session() as sess:\n",
-    "    init.run()\n",
-    "    for iteration in range(n_iterations):\n",
-    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
-    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
-    "        if iteration % 100 == 0:\n",
-    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
-    "            print(iteration, \"\\tMSE:\", mse)\n",
+    "    saver.restore(sess, \"./my_time_series_model\")\n",
     "\n",
     "    sequence1 = [0. for i in range(n_steps)]\n",
     "    for iteration in range(len(t) - n_steps):\n",
@@ -1073,7 +1409,7 @@
     "plt.plot(t, sequence2, \"b-\")\n",
     "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
     "plt.xlabel(\"Time\")\n",
-    "#save_fig(\"creative_sequence_plot\")\n",
+    "save_fig(\"creative_sequence_plot\")\n",
     "plt.show()"
    ]
   },
@@ -1099,7 +1435,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 64,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1110,22 +1446,46 @@
     "tf.reset_default_graph()\n",
     "\n",
     "n_inputs = 2\n",
+    "n_steps = 5\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "n_neurons = 100\n",
     "n_layers = 3\n",
-    "n_steps = 5\n",
-    "keep_prob = 0.5\n",
-    "\n",
-    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([basic_cell] * n_layers)\n",
-    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
     "\n",
+    "layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
+    "          for layer in range(n_layers)]\n",
+    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "init = tf.global_variables_initializer()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 67,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1138,7 +1498,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 68,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1153,7 +1513,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 69,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1171,91 +1531,7 @@
     "editable": true
    },
    "source": [
-    "## Dropout"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "tf.reset_default_graph()\n",
-    "\n",
-    "n_inputs = 1\n",
-    "n_neurons = 100\n",
-    "n_layers = 3\n",
-    "n_steps = 20\n",
-    "n_outputs = 1\n",
-    "\n",
-    "keep_prob = 0.5\n",
-    "learning_rate = 0.001\n",
-    "\n",
-    "is_training = True\n",
-    "\n",
-    "def deep_rnn_with_dropout(X, y, is_training):\n",
-    "    cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
-    "    if is_training:\n",
-    "        cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
-    "    multi_layer_cell = tf.contrib.rnn.MultiRNNCell([cell] * n_layers)\n",
-    "    rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
-    "\n",
-    "    stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
-    "    stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
-    "    outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
-    "\n",
-    "    loss = tf.reduce_sum(tf.square(outputs - y))\n",
-    "    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
-    "    training_op = optimizer.minimize(loss)\n",
-    "\n",
-    "    return outputs, loss, training_op\n",
-    "\n",
-    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
-    "outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n",
-    "init = tf.global_variables_initializer()\n",
-    "saver = tf.train.Saver()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "n_iterations = 2000\n",
-    "batch_size = 50\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    if is_training:\n",
-    "        init.run()\n",
-    "        for iteration in range(n_iterations):\n",
-    "            X_batch, y_batch = next_batch(batch_size, n_steps)\n",
-    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
-    "            if iteration % 100 == 0:\n",
-    "                mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
-    "                print(iteration, \"\\tMSE:\", mse)\n",
-    "        save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
-    "    else:\n",
-    "        saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
-    "        X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
-    "        y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
-    "        \n",
-    "        plt.title(\"Testing the model\", fontsize=14)\n",
-    "        plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
-    "        plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
-    "        plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
-    "        plt.legend(loc=\"upper left\")\n",
-    "        plt.xlabel(\"Time\")\n",
-    "        plt.show()"
+    "## Distributing a Deep RNN Across Multiple GPUs"
    ]
   },
   {
@@ -1265,96 +1541,24 @@
     "editable": true
    },
    "source": [
-    "# LSTM"
+    "Do **NOT** do this:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 70,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
    "outputs": [],
    "source": [
-    "tf.reset_default_graph()\n",
+    "with tf.device(\"/gpu:0\"):  # BAD! This is ignored.\n",
+    "    layer1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
     "\n",
-    "n_steps = 28\n",
-    "n_inputs = 28\n",
-    "n_neurons = 150\n",
-    "n_outputs = 10\n",
-    "\n",
-    "learning_rate = 0.001\n",
-    "\n",
-    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
-    "y = tf.placeholder(tf.int32, [None])\n",
-    "\n",
-    "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
-    "multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n",
-    "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
-    "top_layer_h_state = states[-1][1]\n",
-    "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
-    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
-    "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
-    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
-    "training_op = optimizer.minimize(loss)\n",
-    "correct = tf.nn.in_top_k(logits, y, 1)\n",
-    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
-    "    \n",
-    "init = tf.global_variables_initializer()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "states"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "top_layer_h_state"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true,
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "n_epochs = 10\n",
-    "batch_size = 150\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    init.run()\n",
-    "    for epoch in range(n_epochs):\n",
-    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
-    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
-    "            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
-    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
-    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
-    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
-    "        print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
+    "with tf.device(\"/gpu:1\"):  # BAD! Ignored again.\n",
+    "    layer2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
    ]
   },
   {
@@ -1364,12 +1568,12 @@
     "editable": true
    },
    "source": [
-    "# Distributing layers across devices"
+    "Instead, you need a `DeviceCellWrapper`:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 71,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1399,7 +1603,278 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 72,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 5\n",
+    "n_steps = 20\n",
+    "n_neurons = 100\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
+    "cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n",
+    "         for dev in devices]\n",
+    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "init = tf.global_variables_initializer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "## Dropout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 1\n",
+    "n_neurons = 100\n",
+    "n_layers = 3\n",
+    "n_steps = 20\n",
+    "n_outputs = 1\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "keep_prob = 0.5\n",
+    "\n",
+    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
+    "         for layer in range(n_layers)]\n",
+    "cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
+    "              for cell in cells]\n",
+    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)\n",
+    "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "learning_rate = 0.01\n",
+    "\n",
+    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
+    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
+    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
+    "\n",
+    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.global_variables_initializer()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 1000\n",
+    "batch_size = 50\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for iteration in range(n_iterations):\n",
+    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
+    "        _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n",
+    "        if iteration % 100 == 0:\n",
+    "            print(iteration, \"Training MSE:\", mse)\n",
+    "    \n",
+    "    saver.save(sess, \"./my_dropout_time_series_model\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 1\n",
+    "n_neurons = 100\n",
+    "n_layers = 3\n",
+    "n_steps = 20\n",
+    "n_outputs = 1\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
+    "\n",
+    "keep_prob = 0.5\n",
+    "\n",
+    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
+    "         for layer in range(n_layers)]\n",
+    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
+    "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
+    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
+    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
+    "\n",
+    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
+    "\n",
+    "init = tf.global_variables_initializer()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"./my_dropout_time_series_model\")\n",
+    "\n",
+    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
+    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
+    "\n",
+    "plt.title(\"Testing the model\", fontsize=14)\n",
+    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
+    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
+    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.xlabel(\"Time\")\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Oops, it seems that Dropout does not help at all in this particular case. :/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1409,21 +1884,109 @@
    "source": [
     "tf.reset_default_graph()\n",
     "\n",
-    "n_inputs = 5\n",
-    "n_neurons = 100\n",
-    "devices = [\"/cpu:0\"]*5\n",
-    "n_steps = 20\n",
-    "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n",
-    "lstm_cells = [DeviceCellWrapper(device, tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n",
-    "              for device in devices]\n",
-    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
-    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "import sys\n",
+    "training = True  # in a script, this would be (sys.argv[-1] == \"train\") instead\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
+    "\n",
+    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
+    "         for layer in range(n_layers)]\n",
+    "if training:\n",
+    "    cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
+    "             for cell in cells]\n",
+    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
+    "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
+    "\n",
+    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])    # not shown in the book\n",
+    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n",
+    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])   # not shown\n",
+    "loss = tf.reduce_mean(tf.square(outputs - y))                     # not shown\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)   # not shown\n",
+    "training_op = optimizer.minimize(loss)                            # not shown\n",
+    "init = tf.global_variables_initializer()                          # not shown\n",
+    "saver = tf.train.Saver()                                          # not shown\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    if training:\n",
+    "        init.run()\n",
+    "        for iteration in range(n_iterations):\n",
+    "            X_batch, y_batch = next_batch(batch_size, n_steps)    # not shown\n",
+    "            _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n",
+    "            if iteration % 100 == 0:                              # not shown\n",
+    "                print(iteration, \"Training MSE:\", mse)            # not shown\n",
+    "        save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
+    "    else:\n",
+    "        saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
+    "        X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n",
+    "        y_pred = sess.run(outputs, feed_dict={X: X_new})                              # not shown"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": true,
+    "editable": true
+   },
+   "source": [
+    "# LSTM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_steps = 28\n",
+    "n_inputs = 28\n",
+    "n_neurons = 150\n",
+    "n_outputs = 10\n",
+    "n_layers = 3\n",
+    "\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
+    "y = tf.placeholder(tf.int32, [None])\n",
+    "\n",
+    "lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
+    "              for layer in range(n_layers)]\n",
+    "multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
+    "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
+    "top_layer_h_state = states[-1][1]\n",
+    "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
+    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
+    "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "    \n",
     "init = tf.global_variables_initializer()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 85,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1431,9 +1994,72 @@
    },
    "outputs": [],
    "source": [
+    "states"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "top_layer_h_state"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "batch_size = 150\n",
+    "\n",
     "with tf.Session() as sess:\n",
     "    init.run()\n",
-    "    print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))"
+    "    for epoch in range(n_epochs):\n",
+    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
+    "        print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)"
    ]
   },
   {
@@ -1468,7 +2094,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 90,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1511,7 +2137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 91,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1524,7 +2150,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 92,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1547,7 +2173,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 93,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1567,7 +2193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 94,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1580,7 +2206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 95,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1593,7 +2219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 96,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1616,7 +2242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 97,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1654,7 +2280,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 98,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1668,7 +2294,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 99,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1681,7 +2307,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 100,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1704,7 +2330,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 101,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1730,9 +2356,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 102,
    "metadata": {
-    "collapsed": false,
+    "collapsed": true,
     "deletable": true,
     "editable": true
    },
@@ -1741,15 +2367,52 @@
     "tf.reset_default_graph()\n",
     "\n",
     "# Input data.\n",
-    "train_inputs = tf.placeholder(tf.int32, shape=[batch_size])\n",
     "train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
-    "valid_dataset = tf.constant(valid_examples, dtype=tf.int32)\n",
+    "valid_dataset = tf.constant(valid_examples, dtype=tf.int32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "vocabulary_size = 50000\n",
+    "embedding_size = 150\n",
     "\n",
     "# Look up embeddings for inputs.\n",
-    "init_embeddings = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
-    "embeddings = tf.Variable(init_embeddings)\n",
-    "embed = tf.nn.embedding_lookup(embeddings, train_inputs)\n",
-    "\n",
+    "init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
+    "embeddings = tf.Variable(init_embeds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
+    "train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
+    "embed = tf.nn.embedding_lookup(embeddings, train_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
+   },
+   "outputs": [],
+   "source": [
     "# Construct the variables for the NCE loss\n",
     "nce_weights = tf.Variable(\n",
     "    tf.truncated_normal([vocabulary_size, embedding_size],\n",
@@ -1789,7 +2452,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 106,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1848,7 +2511,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 107,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1871,7 +2534,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 108,
    "metadata": {
     "collapsed": true,
     "deletable": true,
@@ -1895,7 +2558,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 109,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1934,7 +2597,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 111,
    "metadata": {
     "collapsed": false,
     "deletable": true,
@@ -1962,8 +2625,9 @@
     "encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n",
     "decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n",
     "\n",
-    "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
-    "cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * n_layers)\n",
+    "lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
+    "              for layer in range(n_layers)]\n",
+    "cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
     "\n",
     "output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n",
     "    encoder_inputs,\n",
@@ -1978,7 +2642,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 112,
    "metadata": {
     "collapsed": false,
     "deletable": true,