handson-ml/14_recurrent_neural_network...

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Chapter 14 – Recurrent Neural Networks**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_This notebook contains all the sample code and solutions to the exercises in chapter 14._"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# To support both python 2 and python 3\n",
    "from __future__ import division, print_function, unicode_literals\n",
    "\n",
    "# Common imports\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "# to make this notebook's output stable across runs\n",
    "def reset_graph(seed=42):\n",
    "    tf.reset_default_graph()\n",
    "    tf.set_random_seed(seed)\n",
    "    np.random.seed(seed)\n",
    "\n",
    "# To plot pretty figures\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rcParams['axes.labelsize'] = 14\n",
    "plt.rcParams['xtick.labelsize'] = 12\n",
    "plt.rcParams['ytick.labelsize'] = 12\n",
    "\n",
    "# Where to save the figures\n",
    "PROJECT_ROOT_DIR = \".\"\n",
    "CHAPTER_ID = \"rnn\"\n",
    "\n",
    "def save_fig(fig_id, tight_layout=True):\n",
    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
    "    print(\"Saving figure\", fig_id)\n",
    "    if tight_layout:\n",
    "        plt.tight_layout()\n",
    "    plt.savefig(path, format='png', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then of course we will need TensorFlow:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Basic RNNs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Manual RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "\n",
    "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n",
    "Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n",
    "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
    "\n",
    "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
    "Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
    "\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
    "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(Y0_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(Y1_val)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using `static_rnn()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_inputs = 3\n",
    "n_neurons = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "\n",
    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],\n",
    "                                                dtype=tf.float32)\n",
    "Y0, Y1 = output_seqs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
    "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y0_val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y1_val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import clear_output, Image, display, HTML\n",
    "\n",
    "def strip_consts(graph_def, max_const_size=32):\n",
    "    \"\"\"Strip large constant values from graph_def.\"\"\"\n",
    "    strip_def = tf.GraphDef()\n",
    "    for n0 in graph_def.node:\n",
    "        n = strip_def.node.add() \n",
    "        n.MergeFrom(n0)\n",
    "        if n.op == 'Const':\n",
    "            tensor = n.attr['value'].tensor\n",
    "            size = len(tensor.tensor_content)\n",
    "            if size > max_const_size:\n",
    "                tensor.tensor_content = \"b<stripped %d bytes>\"%size\n",
    "    return strip_def\n",
    "\n",
    "def show_graph(graph_def, max_const_size=32):\n",
    "    \"\"\"Visualize TensorFlow graph.\"\"\"\n",
    "    if hasattr(graph_def, 'as_graph_def'):\n",
    "        graph_def = graph_def.as_graph_def()\n",
    "    strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
    "    code = \"\"\"\n",
    "        <script>\n",
    "          function load() {{\n",
    "            document.getElementById(\"{id}\").pbtxt = {data};\n",
    "          }}\n",
    "        </script>\n",
    "        <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
    "        <div style=\"height:600px\">\n",
    "          <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
    "        </div>\n",
    "    \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
    "\n",
    "    iframe = \"\"\"\n",
    "        <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
    "    \"\"\".format(code.replace('\"', '&quot;'))\n",
    "    display(HTML(iframe))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "show_graph(tf.get_default_graph())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Packing sequences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = 2\n",
    "n_inputs = 3\n",
    "n_neurons = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n",
    "\n",
    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,\n",
    "                                                dtype=tf.float32)\n",
    "outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
    "        # t = 0      t = 1 \n",
    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
    "        [[3, 4, 5], [0, 0, 0]], # instance 2\n",
    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
    "    ])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val = outputs.eval(feed_dict={X: X_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(outputs_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using `dynamic_rnn()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = 2\n",
    "n_inputs = 3\n",
    "n_neurons = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "\n",
    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
    "        [[3, 4, 5], [0, 0, 0]], # instance 2\n",
    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
    "    ])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val = outputs.eval(feed_dict={X: X_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(outputs_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "show_graph(tf.get_default_graph())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setting the sequence lengths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = 2\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "reset_graph()\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "seq_length = tf.placeholder(tf.int32, [None])\n",
    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n",
    "                                    sequence_length=seq_length)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
    "        # step 0     step 1\n",
    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
    "        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
    "    ])\n",
    "seq_length_batch = np.array([2, 1, 2, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val, states_val = sess.run(\n",
    "        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(outputs_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(states_val)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training a sequence classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
    "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
    "* the default `activation` is now `None` rather than `tf.nn.relu`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_steps = 28\n",
    "n_inputs = 28\n",
    "n_neurons = 150\n",
    "n_outputs = 10\n",
    "\n",
    "learning_rate = 0.001\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.int32, [None])\n",
    "\n",
    "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
    "\n",
    "logits = tf.layers.dense(states, n_outputs)\n",
    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
    "                                                          logits=logits)\n",
    "loss = tf.reduce_mean(xentropy)\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "correct = tf.nn.in_top_k(logits, y, 1)\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
    "\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
    "X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n",
    "y_test = mnist.test.labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 100\n",
    "batch_size = 150\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
    "            X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Multi-layer RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_steps = 28\n",
    "n_inputs = 28\n",
    "n_outputs = 10\n",
    "\n",
    "learning_rate = 0.001\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.int32, [None])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_neurons = 100\n",
    "n_layers = 3\n",
    "\n",
    "layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,\n",
    "                                      activation=tf.nn.relu)\n",
    "          for layer in range(n_layers)]\n",
    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "states_concat = tf.concat(axis=1, values=states)\n",
    "logits = tf.layers.dense(states_concat, n_outputs)\n",
    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
    "loss = tf.reduce_mean(xentropy)\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "correct = tf.nn.in_top_k(logits, y, 1)\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
    "\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 10\n",
    "batch_size = 150\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
    "            X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Time series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "t_min, t_max = 0, 30\n",
    "resolution = 0.1\n",
    "\n",
    "def time_series(t):\n",
    "    return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
    "\n",
    "def next_batch(batch_size, n_steps):\n",
    "    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
    "    Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
    "    ys = time_series(Ts)\n",
    "    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n",
    "\n",
    "n_steps = 20\n",
    "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
    "\n",
    "plt.figure(figsize=(11,4))\n",
    "plt.subplot(121)\n",
    "plt.title(\"A time series (generated)\", fontsize=14)\n",
    "plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
    "plt.legend(loc=\"lower left\", fontsize=14)\n",
    "plt.axis([0, 30, -17, 13])\n",
    "plt.xlabel(\"Time\")\n",
    "plt.ylabel(\"Value\")\n",
    "\n",
    "plt.subplot(122)\n",
    "plt.title(\"A training instance\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "\n",
    "save_fig(\"time_series_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch, y_batch = next_batch(1, n_steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.c_[X_batch[0], y_batch[0]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using an `OuputProjectionWrapper`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_steps = 20\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "n_outputs = 1\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
    "\n",
    "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_steps = 20\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "n_outputs = 1\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
    "    tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
    "    output_size=n_outputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 0.001\n",
    "\n",
    "loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_iterations = 1500\n",
    "batch_size = 50\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        if iteration % 100 == 0:\n",
    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "            print(iteration, \"\\tMSE:\", mse)\n",
    "    \n",
    "    saver.save(sess, \"./my_time_series_model\") # not shown in the book"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:                          # not shown in the book\n",
    "    saver.restore(sess, \"./my_time_series_model\")   # not shown\n",
    "\n",
    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
    "    y_pred = sess.run(outputs, feed_dict={X: X_new})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.title(\"Testing the model\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "save_fig(\"time_series_pred_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Without using an `OutputProjectionWrapper`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_steps = 20\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
    "rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_outputs = 1\n",
    "learning_rate = 0.001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "init = tf.global_variables_initializer()\n",
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_iterations = 1500\n",
    "batch_size = 50\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        if iteration % 100 == 0:\n",
    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "            print(iteration, \"\\tMSE:\", mse)\n",
    "    \n",
    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
    "    \n",
    "    saver.save(sess, \"./my_time_series_model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.title(\"Testing the model\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generating a creative new sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:                        # not shown in the book\n",
    "    saver.restore(sess, \"./my_time_series_model\") # not shown\n",
    "\n",
    "    sequence = [0.] * n_steps\n",
    "    for iteration in range(300):\n",
    "        X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n",
    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
    "        sequence.append(y_pred[0, -1, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(8,4))\n",
    "plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n",
    "plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n",
    "plt.xlabel(\"Time\")\n",
    "plt.ylabel(\"Value\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    saver.restore(sess, \"./my_time_series_model\")\n",
    "\n",
    "    sequence1 = [0. for i in range(n_steps)]\n",
    "    for iteration in range(len(t) - n_steps):\n",
    "        X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
    "        sequence1.append(y_pred[0, -1, 0])\n",
    "\n",
    "    sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
    "    for iteration in range(len(t) - n_steps):\n",
    "        X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
    "        sequence2.append(y_pred[0, -1, 0])\n",
    "\n",
    "plt.figure(figsize=(11,4))\n",
    "plt.subplot(121)\n",
    "plt.plot(t, sequence1, \"b-\")\n",
    "plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
    "plt.xlabel(\"Time\")\n",
    "plt.ylabel(\"Value\")\n",
    "\n",
    "plt.subplot(122)\n",
    "plt.plot(t, sequence2, \"b-\")\n",
    "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
    "plt.xlabel(\"Time\")\n",
    "save_fig(\"creative_sequence_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deep RNN"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MultiRNNCell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_inputs = 2\n",
    "n_steps = 5\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_neurons = 100\n",
    "n_layers = 3\n",
    "\n",
    "layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "          for layer in range(n_layers)]\n",
    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_batch = np.random.rand(2, n_steps, n_inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs_val.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Distributing a Deep RNN Across Multiple GPUs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Do **NOT** do this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.device(\"/gpu:0\"):  # BAD! This is ignored.\n",
    "    layer1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "\n",
    "with tf.device(\"/gpu:1\"):  # BAD! Ignored again.\n",
    "    layer2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Instead, you need a `DeviceCellWrapper`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "class DeviceCellWrapper(tf.contrib.rnn.RNNCell):\n",
    "  def __init__(self, device, cell):\n",
    "    self._cell = cell\n",
    "    self._device = device\n",
    "\n",
    "  @property\n",
    "  def state_size(self):\n",
    "    return self._cell.state_size\n",
    "\n",
    "  @property\n",
    "  def output_size(self):\n",
    "    return self._cell.output_size\n",
    "\n",
    "  def __call__(self, inputs, state, scope=None):\n",
    "    with tf.device(self._device):\n",
    "        return self._cell(inputs, state, scope)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_inputs = 5\n",
    "n_steps = 20\n",
    "n_neurons = 100\n",
    "\n",
    "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
    "cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n",
    "         for dev in devices]\n",
    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Alternatively, since TensorFlow 1.1, you can use the `tf.contrib.rnn.DeviceWrapper` class (alias `tf.nn.rnn_cell.DeviceWrapper` since TF 1.2)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "n_layers = 3\n",
    "n_steps = 20\n",
    "n_outputs = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "keep_prob = tf.placeholder_with_default(1.0, shape=())\n",
    "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
    "         for layer in range(n_layers)]\n",
    "cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
    "              for cell in cells]\n",
    "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)\n",
    "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 0.01\n",
    "\n",
    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
    "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
    "\n",
    "loss = tf.reduce_mean(tf.square(outputs - y))\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "init = tf.global_variables_initializer()\n",
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_iterations = 1500\n",
    "batch_size = 50\n",
    "train_keep_prob = 0.5\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "        _, mse = sess.run([training_op, loss],\n",
    "                          feed_dict={X: X_batch, y: y_batch,\n",
    "                                     keep_prob: train_keep_prob})\n",
    "        if iteration % 100 == 0:                   # not shown in the book\n",
    "            print(iteration, \"Training MSE:\", mse) # not shown\n",
    "    \n",
    "    saver.save(sess, \"./my_dropout_time_series_model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    saver.restore(sess, \"./my_dropout_time_series_model\")\n",
    "\n",
    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
    "    y_pred = sess.run(outputs, feed_dict={X: X_new})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.title(\"Testing the model\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Oops, it seems that Dropout does not help at all in this particular case. :/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_steps = 28\n",
    "n_inputs = 28\n",
    "n_neurons = 150\n",
    "n_outputs = 10\n",
    "n_layers = 3\n",
    "\n",
    "learning_rate = 0.001\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.int32, [None])\n",
    "\n",
    "lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
    "              for layer in range(n_layers)]\n",
    "multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
    "top_layer_h_state = states[-1][1]\n",
    "logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
    "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "correct = tf.nn.in_top_k(logits, y, 1)\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
    "    \n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "states"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "top_layer_h_state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "n_epochs = 10\n",
    "batch_size = 150\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
    "            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
    "        print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Embeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fetch the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from six.moves import urllib\n",
    "\n",
    "import errno\n",
    "import os\n",
    "import zipfile\n",
    "\n",
    "WORDS_PATH = \"datasets/words\"\n",
    "WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n",
    "\n",
    "def mkdir_p(path):\n",
    "    \"\"\"Create directories, ok if they already exist.\n",
    "    \n",
    "    This is for python 2 support. In python >=3.2, simply use:\n",
    "    >>> os.makedirs(path, exist_ok=True)\n",
    "    \"\"\"\n",
    "    try:\n",
    "        os.makedirs(path)\n",
    "    except OSError as exc:\n",
    "        if exc.errno == errno.EEXIST and os.path.isdir(path):\n",
    "            pass\n",
    "        else:\n",
    "            raise\n",
    "\n",
    "def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n",
    "    os.makedirs(words_path, exist_ok=True)\n",
    "    zip_path = os.path.join(words_path, \"words.zip\")\n",
    "    if not os.path.exists(zip_path):\n",
    "        urllib.request.urlretrieve(words_url, zip_path)\n",
    "    with zipfile.ZipFile(zip_path) as f:\n",
    "        data = f.read(f.namelist()[0])\n",
    "    return data.decode(\"ascii\").split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "words = fetch_words_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "words[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build the dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "\n",
    "vocabulary_size = 50000\n",
    "\n",
    "vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n",
    "vocabulary = np.array([word for word, _ in vocabulary])\n",
    "dictionary = {word: code for code, word in enumerate(vocabulary)}\n",
    "data = np.array([dictionary.get(word, 0) for word in words])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "\" \".join(words[:9]), data[:9]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "words[24], data[24]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "from collections import deque\n",
    "\n",
    "def generate_batch(batch_size, num_skips, skip_window):\n",
    "    global data_index\n",
    "    assert batch_size % num_skips == 0\n",
    "    assert num_skips <= 2 * skip_window\n",
    "    batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n",
    "    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n",
    "    span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
    "    buffer = deque(maxlen=span)\n",
    "    for _ in range(span):\n",
    "        buffer.append(data[data_index])\n",
    "        data_index = (data_index + 1) % len(data)\n",
    "    for i in range(batch_size // num_skips):\n",
    "        target = skip_window  # target label at the center of the buffer\n",
    "        targets_to_avoid = [ skip_window ]\n",
    "        for j in range(num_skips):\n",
    "            while target in targets_to_avoid:\n",
    "                target = random.randint(0, span - 1)\n",
    "            targets_to_avoid.append(target)\n",
    "            batch[i * num_skips + j] = buffer[skip_window]\n",
    "            labels[i * num_skips + j, 0] = buffer[target]\n",
    "        buffer.append(data[data_index])\n",
    "        data_index = (data_index + 1) % len(data)\n",
    "    return batch, labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_index=0\n",
    "batch, labels = generate_batch(8, 2, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch, [vocabulary[word] for word in batch]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels, [vocabulary[word] for word in labels[:, 0]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_size = 128\n",
    "embedding_size = 128  # Dimension of the embedding vector.\n",
    "skip_window = 1       # How many words to consider left and right.\n",
    "num_skips = 2         # How many times to reuse an input to generate a label.\n",
    "\n",
    "# We pick a random validation set to sample nearest neighbors. Here we limit the\n",
    "# validation samples to the words that have a low numeric ID, which by\n",
    "# construction are also the most frequent.\n",
    "valid_size = 16     # Random set of words to evaluate similarity on.\n",
    "valid_window = 100  # Only pick dev samples in the head of the distribution.\n",
    "valid_examples = np.random.choice(valid_window, valid_size, replace=False)\n",
    "num_sampled = 64    # Number of negative examples to sample.\n",
    "\n",
    "learning_rate = 0.01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "# Input data.\n",
    "train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
    "valid_dataset = tf.constant(valid_examples, dtype=tf.int32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "vocabulary_size = 50000\n",
    "embedding_size = 150\n",
    "\n",
    "# Look up embeddings for inputs.\n",
    "init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
    "embeddings = tf.Variable(init_embeds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
    "embed = tf.nn.embedding_lookup(embeddings, train_inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Construct the variables for the NCE loss\n",
    "nce_weights = tf.Variable(\n",
    "    tf.truncated_normal([vocabulary_size, embedding_size],\n",
    "                        stddev=1.0 / np.sqrt(embedding_size)))\n",
    "nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n",
    "\n",
    "# Compute the average NCE loss for the batch.\n",
    "# tf.nce_loss automatically draws a new sample of the negative labels each\n",
    "# time we evaluate the loss.\n",
    "loss = tf.reduce_mean(\n",
    "    tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,\n",
    "                   num_sampled, vocabulary_size))\n",
    "\n",
    "# Construct the Adam optimizer\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "# Compute the cosine similarity between minibatch examples and all embeddings.\n",
    "norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))\n",
    "normalized_embeddings = embeddings / norm\n",
    "valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
    "similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
    "\n",
    "# Add variable initializer.\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_steps = 10001\n",
    "\n",
    "with tf.Session() as session:\n",
    "    init.run()\n",
    "\n",
    "    average_loss = 0\n",
    "    for step in range(num_steps):\n",
    "        print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n",
    "        batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n",
    "        feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n",
    "\n",
    "        # We perform one update step by evaluating the training op (including it\n",
    "        # in the list of returned values for session.run()\n",
    "        _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n",
    "        average_loss += loss_val\n",
    "\n",
    "        if step % 2000 == 0:\n",
    "            if step > 0:\n",
    "                average_loss /= 2000\n",
    "            # The average loss is an estimate of the loss over the last 2000 batches.\n",
    "            print(\"Average loss at step \", step, \": \", average_loss)\n",
    "            average_loss = 0\n",
    "\n",
    "        # Note that this is expensive (~20% slowdown if computed every 500 steps)\n",
    "        if step % 10000 == 0:\n",
    "            sim = similarity.eval()\n",
    "            for i in range(valid_size):\n",
    "                valid_word = vocabulary[valid_examples[i]]\n",
    "                top_k = 8 # number of nearest neighbors\n",
    "                nearest = (-sim[i, :]).argsort()[1:top_k+1]\n",
    "                log_str = \"Nearest to %s:\" % valid_word\n",
    "                for k in range(top_k):\n",
    "                    close_word = vocabulary[nearest[k]]\n",
    "                    log_str = \"%s %s,\" % (log_str, close_word)\n",
    "                print(log_str)\n",
    "\n",
    "    final_embeddings = normalized_embeddings.eval()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.save(\"./my_final_embeddings.npy\", final_embeddings)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot the embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def plot_with_labels(low_dim_embs, labels):\n",
    "    assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n",
    "    plt.figure(figsize=(18, 18))  #in inches\n",
    "    for i, label in enumerate(labels):\n",
    "        x, y = low_dim_embs[i,:]\n",
    "        plt.scatter(x, y)\n",
    "        plt.annotate(label,\n",
    "                     xy=(x, y),\n",
    "                     xytext=(5, 2),\n",
    "                     textcoords='offset points',\n",
    "                     ha='right',\n",
    "                     va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.manifold import TSNE\n",
    "\n",
    "tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n",
    "plot_only = 500\n",
    "low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n",
    "labels = [vocabulary[i] for i in range(plot_only)]\n",
    "plot_with_labels(low_dim_embs, labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Machine Translation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "reset_graph()\n",
    "\n",
    "n_steps = 50\n",
    "n_neurons = 200\n",
    "n_layers = 3\n",
    "num_encoder_symbols = 20000\n",
    "num_decoder_symbols = 20000\n",
    "embedding_size = 150\n",
    "learning_rate = 0.01\n",
    "\n",
    "X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n",
    "Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n",
    "W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n",
    "Y_input = Y[:, :-1]\n",
    "Y_target = Y[:, 1:]\n",
    "\n",
    "encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n",
    "decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n",
    "\n",
    "lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
    "              for layer in range(n_layers)]\n",
    "cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
    "\n",
    "output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n",
    "    encoder_inputs,\n",
    "    decoder_inputs,\n",
    "    cell,\n",
    "    num_encoder_symbols,\n",
    "    num_decoder_symbols,\n",
    "    embedding_size)\n",
    "\n",
    "logits = tf.transpose(tf.unstack(output_seqs), perm=[1, 0, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
    "Y_target_flat = tf.reshape(Y_target, [-1])\n",
    "W_flat = tf.reshape(W, [-1])\n",
    "xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y_target_flat, logits=logits_flat)\n",
    "loss = tf.reduce_mean(xentropy)\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Exercise solutions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. to 6."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "See Appendix A."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Embedded Reber Grammars"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First we need to build a function that generates strings based on a grammar. The grammar will be represented as a list of possible transitions for each state. A transition specifies the string to output (or a grammar to generate it) and the next state."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from random import choice, seed\n",
    "\n",
    "# to make this notebook's output stable across runs\n",
    "seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "default_reber_grammar = [\n",
    "    [(\"B\", 1)],           # (state 0) =B=>(state 1)\n",
    "    [(\"T\", 2), (\"P\", 3)], # (state 1) =T=>(state 2) or =P=>(state 3)\n",
    "    [(\"S\", 2), (\"X\", 4)], # (state 2) =S=>(state 2) or =X=>(state 4)\n",
    "    [(\"T\", 3), (\"V\", 5)], # and so on...\n",
    "    [(\"X\", 3), (\"S\", 6)],\n",
    "    [(\"P\", 4), (\"V\", 6)],\n",
    "    [(\"E\", None)]]        # (state 6) =E=>(terminal state)\n",
    "\n",
    "embedded_reber_grammar = [\n",
    "    [(\"B\", 1)],\n",
    "    [(\"T\", 2), (\"P\", 3)],\n",
    "    [(default_reber_grammar, 4)],\n",
    "    [(default_reber_grammar, 5)],\n",
    "    [(\"T\", 6)],\n",
    "    [(\"P\", 6)],\n",
    "    [(\"E\", None)]]\n",
    "\n",
    "def generate_string(grammar):\n",
    "    state = 0\n",
    "    output = []\n",
    "    while state is not None:\n",
    "        production, state = choice(grammar[state])\n",
    "        if isinstance(production, list):\n",
    "            production = generate_string(grammar=production)\n",
    "        output.append(production)\n",
    "    return \"\".join(output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's generate a few strings based on the default Reber grammar:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(25):\n",
    "    print(generate_string(default_reber_grammar), end=\" \")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looks good. Now let's generate a few strings based on the embedded Reber grammar:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(25):\n",
    "    print(generate_string(embedded_reber_grammar), end=\" \")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Okay, now we need a function to generate strings that do not respect the grammar. We could generate a random string, but the task would be a bit too easy, so instead we will generate a string that respects the grammar, and we will corrupt it by changing just one character:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_corrupted_string(grammar, chars=\"BEPSTVX\"):\n",
    "    good_string = generate_string(grammar)\n",
    "    index = np.random.randint(len(good_string))\n",
    "    good_char = good_string[index]\n",
    "    bad_char = choice(list(set(chars) - set(good_char)))\n",
    "    return good_string[:index] + bad_char + good_string[index + 1:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's look at a few corrupted strings:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(25):\n",
    "    print(generate_corrupted_string(embedded_reber_grammar), end=\" \")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It's not possible to feed a string directly to an RNN: we need to convert it to a sequence of vectors, first. Each vector will represent a single letter, using a one-hot encoding. For example, the letter \"B\" will be represented as the vector `[1, 0, 0, 0, 0, 0, 0]`, the letter E will be represented as `[0, 1, 0, 0, 0, 0, 0]` and so on. Let's write a function that converts a string to a sequence of such one-hot vectors. Note that if the string is shorted than `n_steps`, it will be padded with zero vectors (later, we will tell TensorFlow how long each string actually is using the `sequence_length` parameter)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def string_to_one_hot_vectors(string, n_steps, chars=\"BEPSTVX\"):\n",
    "    char_to_index = {char: index for index, char in enumerate(chars)}\n",
    "    output = np.zeros((n_steps, len(chars)), dtype=np.int32)\n",
    "    for index, char in enumerate(string):\n",
    "        output[index, char_to_index[char]] = 1.\n",
    "    return output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "string_to_one_hot_vectors(\"BTBTXSETE\", 12)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can now generate the dataset, with 50% good strings, and 50% bad strings:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_dataset(size):\n",
    "    good_strings = [generate_string(embedded_reber_grammar)\n",
    "                    for _ in range(size // 2)]\n",
    "    bad_strings = [generate_corrupted_string(embedded_reber_grammar)\n",
    "                   for _ in range(size - size // 2)]\n",
    "    all_strings = good_strings + bad_strings\n",
    "    n_steps = max([len(string) for string in all_strings])\n",
    "    X = np.array([string_to_one_hot_vectors(string, n_steps)\n",
    "                  for string in all_strings])\n",
    "    seq_length = np.array([len(string) for string in all_strings])\n",
    "    y = np.array([[1] for _ in range(len(good_strings))] +\n",
    "                 [[0] for _ in range(len(bad_strings))])\n",
    "    rnd_idx = np.random.permutation(size)\n",
    "    return X[rnd_idx], seq_length[rnd_idx], y[rnd_idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train, l_train, y_train = generate_dataset(10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's take a look at the first training instances:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It's padded with a lot of zeros because the longest string in the dataset is that long. How long is this particular string?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_train[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What class is it?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Perfect! We are ready to create the RNN to identify good strings. We build a sequence classifier very similar to the one we built earlier to classify MNIST images, with two main differences:\n",
    "* First, the input strings have variable length, so we need to specify the `sequence_length` when calling the `dynamic_rnn()` function.\n",
    "* Second, this is a binary classifier, so we only need one output neuron that will output, for each input string, the estimated log probability that it is a good string. For multiclass classification, we used `sparse_softmax_cross_entropy_with_logits()` but for binary classification we use `sigmoid_cross_entropy_with_logits()`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "reset_graph()\n",
    "\n",
    "possible_chars = \"BEPSTVX\"\n",
    "n_inputs = len(possible_chars)\n",
    "n_neurons = 30\n",
    "n_outputs = 1\n",
    "\n",
    "learning_rate = 0.02\n",
    "momentum = 0.95\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, None, n_inputs], name=\"X\")\n",
    "seq_length = tf.placeholder(tf.int32, [None], name=\"seq_length\")\n",
    "y = tf.placeholder(tf.float32, [None, 1], name=\"y\")\n",
    "\n",
    "gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)\n",
    "outputs, states = tf.nn.dynamic_rnn(gru_cell, X, dtype=tf.float32,\n",
    "                                    sequence_length=seq_length)\n",
    "\n",
    "logits = tf.layers.dense(states, n_outputs, name=\"logits\")\n",
    "y_pred = tf.cast(tf.greater(logits, 0.), tf.float32, name=\"y_pred\")\n",
    "y_proba = tf.nn.sigmoid(logits, name=\"y_proba\")\n",
    "\n",
    "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)\n",
    "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
    "optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,\n",
    "                                       momentum=momentum,\n",
    "                                       use_nesterov=True)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "correct = tf.equal(y_pred, y, name=\"correct\")\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name=\"accuracy\")\n",
    "\n",
    "init = tf.global_variables_initializer()\n",
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's generate a validation set so we can track progress during training:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_val, l_val, y_val = generate_dataset(5000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 50\n",
    "batch_size = 50\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        X_batches = np.array_split(X_train, len(X_train) // batch_size)\n",
    "        l_batches = np.array_split(l_train, len(l_train) // batch_size)\n",
    "        y_batches = np.array_split(y_train, len(y_train) // batch_size)\n",
    "        for X_batch, l_batch, y_batch in zip(X_batches, l_batches, y_batches):\n",
    "            loss_val, _ = sess.run(\n",
    "                [loss, training_op],\n",
    "                feed_dict={X: X_batch, seq_length: l_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, seq_length: l_batch, y: y_batch})\n",
    "        acc_val = accuracy.eval(feed_dict={X: X_val, seq_length: l_val, y: y_val})\n",
    "        print(\"{:4d}  Train loss: {:.4f}, accuracy: {:.2f}%  Validation accuracy: {:.2f}%\".format(\n",
    "            epoch, loss_val, 100 * acc_train, 100 * acc_val))\n",
    "        saver.save(sess, \"my_reber_classifier\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's test our RNN on two tricky strings: the first one is bad while the second one is good. They only differ by the second to last character. If the RNN gets this right, it shows that it managed to notice the pattern that the second letter should always be equal to the second to last letter. That requires a fairly long short-term memory (which is the reason why we used a GRU cell)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_strings = [\n",
    "    \"BPBTSSSSSSSSSSSSXXTTTTTVPXTTVPXTTTTTTTVPXVPXVPXTTTVVETE\",\n",
    "    \"BPBTSSSSSSSSSSSSXXTTTTTVPXTTVPXTTTTTTTVPXVPXVPXTTTVVEPE\"]\n",
    "l_test = np.array([len(s) for s in test_strings])\n",
    "max_length = l_test.max()\n",
    "X_test = [string_to_one_hot_vectors(s, n_steps=max_length)\n",
    "          for s in test_strings]\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    saver.restore(sess, \"my_reber_classifier\")\n",
    "    y_proba_val = y_proba.eval(feed_dict={X: X_test, seq_length: l_test})\n",
    "\n",
    "print()\n",
    "print(\"Estimated probability that these are Reber strings:\")\n",
    "for index, string in enumerate(test_strings):\n",
    "    print(\"{}: {:.2f}%\".format(string, y_proba_val[index][0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ta-da! It worked fine. The RNN found the correct answers with absolute confidence. :)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. and 9."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Coming soon..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  },
  "nav_menu": {},
  "toc": {
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 6,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}