handson-ml/14_recurrent_neural_network...

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Chapter 14 – Recurrent Neural Networks**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_This notebook contains all the sample code and solutions to the exercices in chapter 14._"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# To support both python 2 and python 3\n",
    "from __future__ import division, print_function, unicode_literals\n",
    "\n",
    "# Common imports\n",
    "import numpy as np\n",
    "import numpy.random as rnd\n",
    "import os\n",
    "\n",
    "# to make this notebook's output stable across runs\n",
    "rnd.seed(42)\n",
    "\n",
    "# To plot pretty figures\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rcParams['axes.labelsize'] = 14\n",
    "plt.rcParams['xtick.labelsize'] = 12\n",
    "plt.rcParams['ytick.labelsize'] = 12\n",
    "\n",
    "# Where to save the figures\n",
    "PROJECT_ROOT_DIR = \".\"\n",
    "CHAPTER_ID = \"rnn\"\n",
    "\n",
    "def save_fig(fig_id, tight_layout=True):\n",
    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
    "    print(\"Saving figure\", fig_id)\n",
    "    if tight_layout:\n",
    "        plt.tight_layout()\n",
    "    plt.savefig(path, format='png', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then of course we will need TensorFlow:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Basic RNNs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Manual RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "\n",
    "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n",
    "Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n",
    "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
    "\n",
    "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
    "Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
    "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(Y0_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(Y1_val)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using `rnn()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
    "\n",
    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
    "output_seqs, states = tf.nn.rnn(basic_cell, [X0, X1], dtype=tf.float32)\n",
    "Y0, Y1 = output_seqs\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
    "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Y0_val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Y1_val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from IPython.display import clear_output, Image, display, HTML\n",
    "\n",
    "def strip_consts(graph_def, max_const_size=32):\n",
    "    \"\"\"Strip large constant values from graph_def.\"\"\"\n",
    "    strip_def = tf.GraphDef()\n",
    "    for n0 in graph_def.node:\n",
    "        n = strip_def.node.add() \n",
    "        n.MergeFrom(n0)\n",
    "        if n.op == 'Const':\n",
    "            tensor = n.attr['value'].tensor\n",
    "            size = len(tensor.tensor_content)\n",
    "            if size > max_const_size:\n",
    "                tensor.tensor_content = \"b<stripped %d bytes>\"%size\n",
    "    return strip_def\n",
    "\n",
    "def show_graph(graph_def, max_const_size=32):\n",
    "    \"\"\"Visualize TensorFlow graph.\"\"\"\n",
    "    if hasattr(graph_def, 'as_graph_def'):\n",
    "        graph_def = graph_def.as_graph_def()\n",
    "    strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
    "    code = \"\"\"\n",
    "        <script>\n",
    "          function load() {{\n",
    "            document.getElementById(\"{id}\").pbtxt = {data};\n",
    "          }}\n",
    "        </script>\n",
    "        <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
    "        <div style=\"height:600px\">\n",
    "          <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
    "        </div>\n",
    "    \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
    "\n",
    "    iframe = \"\"\"\n",
    "        <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
    "    \"\"\".format(code.replace('\"', '&quot;'))\n",
    "    display(HTML(iframe))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "show_graph(tf.get_default_graph())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Packing sequences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_steps = 2\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "X_seqs = tf.unpack(tf.transpose(X, perm=[1, 0, 2]))\n",
    "\n",
    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
    "output_seqs, states = tf.nn.rnn(basic_cell, X_seqs, dtype=tf.float32)\n",
    "outputs = tf.transpose(tf.pack(output_seqs), perm=[1, 0, 2])\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
    "        # t = 0      t = 1 \n",
    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
    "        [[3, 4, 5], [0, 0, 0]], # instance 2\n",
    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
    "    ])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val = outputs.eval(feed_dict={X: X_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using `dynamic_rnn()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_steps = 2\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "\n",
    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
    "        [[3, 4, 5], [0, 0, 0]], # instance 2\n",
    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
    "    ])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    print(\"outputs =\", outputs.eval(feed_dict={X: X_batch}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "show_graph(tf.get_default_graph())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setting the sequence lengths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_steps = 2\n",
    "n_inputs = 3\n",
    "n_neurons = 5\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "seq_length = tf.placeholder(tf.int32, [None])\n",
    "\n",
    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
    "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_batch = np.array([\n",
    "        # step 0     step 1\n",
    "        [[0, 1, 2], [9, 8, 7]], # instance 1\n",
    "        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
    "        [[6, 7, 8], [6, 5, 4]], # instance 3\n",
    "        [[9, 0, 1], [3, 2, 1]], # instance 4\n",
    "    ])\n",
    "seq_length_batch = np.array([2, 1, 2, 2])\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val, states_val = sess.run(\n",
    "        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(outputs_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(states_val)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training a sequence classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "n_steps = 28\n",
    "n_inputs = 28\n",
    "n_neurons = 150\n",
    "n_outputs = 10\n",
    "\n",
    "learning_rate = 0.001\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.int32, [None])\n",
    "\n",
    "with tf.variable_scope(\"\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n",
    "    basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
    "    outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
    "\n",
    "logits = fully_connected(states, n_outputs, activation_fn=None)\n",
    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
    "loss = tf.reduce_mean(xentropy)\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "correct = tf.nn.in_top_k(logits, y, 1)\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
    "X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n",
    "y_test = mnist.test.labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_epochs = 100\n",
    "batch_size = 150\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
    "            X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Multi-layer RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "n_steps = 28\n",
    "n_inputs = 28\n",
    "n_neurons1 = 150\n",
    "n_neurons2 = 100\n",
    "n_outputs = 10\n",
    "\n",
    "learning_rate = 0.001\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.int32, [None])\n",
    "\n",
    "hidden1 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n",
    "hidden2 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n",
    "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([hidden1, hidden2])\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
    "\n",
    "logits = fully_connected(states, n_outputs, activation_fn=None)\n",
    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
    "loss = tf.reduce_mean(xentropy)\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "correct = tf.nn.in_top_k(logits, y, 1)\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_epochs = 100\n",
    "batch_size = 150\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
    "            X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
    "        print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Time series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "t_min, t_max = 0, 30\n",
    "resolution = 0.1\n",
    "\n",
    "def time_series(t):\n",
    "    return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
    "\n",
    "def next_batch(batch_size, n_steps):\n",
    "    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
    "    Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
    "    ys = time_series(Ts)\n",
    "    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n",
    "\n",
    "n_steps = 20\n",
    "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
    "\n",
    "plt.figure(figsize=(11,4))\n",
    "plt.subplot(121)\n",
    "plt.title(\"A time series (generated)\", fontsize=14)\n",
    "plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
    "plt.legend(loc=\"lower left\", fontsize=14)\n",
    "plt.axis([0, 30, -17, 13])\n",
    "plt.xlabel(\"Time\")\n",
    "plt.ylabel(\"Value\")\n",
    "\n",
    "plt.subplot(122)\n",
    "plt.title(\"A training instance\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "\n",
    "save_fig(\"time_series_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_batch, y_batch = next_batch(1, n_steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.c_[X_batch[0], y_batch[0]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using an `OuputProjectionWrapper`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "n_steps = 20\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "n_outputs = 1\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
    "\n",
    "cell = tf.nn.rnn_cell.OutputProjectionWrapper(\n",
    "    tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
    "    output_size=n_outputs)\n",
    "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n",
    "\n",
    "n_outputs = 1\n",
    "learning_rate = 0.001\n",
    "\n",
    "loss = tf.reduce_sum(tf.square(outputs - y))\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_iterations = 1000\n",
    "batch_size = 50\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        if iteration % 100 == 0:\n",
    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "            print(iteration, \"\\tMSE:\", mse)\n",
    "    \n",
    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
    "    print(y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.title(\"Testing the model\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "save_fig(\"time_series_pred_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Without using an `OutputProjectionWrapper`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "n_steps = 20\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
    "\n",
    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
    "rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
    "\n",
    "n_outputs = 1\n",
    "learning_rate = 0.001\n",
    "\n",
    "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
    "stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
    "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
    "\n",
    "loss = tf.reduce_sum(tf.square(outputs - y))\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_iterations = 1000\n",
    "batch_size = 50\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        if iteration % 100 == 0:\n",
    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "            print(iteration, \"\\tMSE:\", mse)\n",
    "    \n",
    "    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
    "    y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
    "    print(y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.title(\"Testing the model\", fontsize=14)\n",
    "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
    "plt.legend(loc=\"upper left\")\n",
    "plt.xlabel(\"Time\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generating a creative new sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_iterations = 2000\n",
    "batch_size = 50\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for iteration in range(n_iterations):\n",
    "        X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        if iteration % 100 == 0:\n",
    "            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "            print(iteration, \"\\tMSE:\", mse)\n",
    "\n",
    "    sequence1 = [0. for i in range(n_steps)]\n",
    "    for iteration in range(len(t) - n_steps):\n",
    "        X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
    "        sequence1.append(y_pred[0, -1, 0])\n",
    "\n",
    "    sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
    "    for iteration in range(len(t) - n_steps):\n",
    "        X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
    "        y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
    "        sequence2.append(y_pred[0, -1, 0])\n",
    "\n",
    "plt.figure(figsize=(11,4))\n",
    "plt.subplot(121)\n",
    "plt.plot(t, sequence1, \"b-\")\n",
    "plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
    "plt.xlabel(\"Time\")\n",
    "plt.ylabel(\"Value\")\n",
    "\n",
    "plt.subplot(122)\n",
    "plt.plot(t, sequence2, \"b-\")\n",
    "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
    "plt.xlabel(\"Time\")\n",
    "#save_fig(\"creative_sequence_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deep RNN"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MultiRNNCell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_inputs = 2\n",
    "n_neurons = 100\n",
    "n_layers = 3\n",
    "n_steps = 5\n",
    "keep_prob = 0.5\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
    "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([basic_cell] * n_layers)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
    "\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_batch = rnd.rand(2, n_steps, n_inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "outputs_val.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "n_inputs = 1\n",
    "n_neurons = 100\n",
    "n_layers = 3\n",
    "n_steps = 20\n",
    "n_outputs = 1\n",
    "\n",
    "keep_prob = 0.5\n",
    "learning_rate = 0.001\n",
    "\n",
    "is_training = True\n",
    "\n",
    "def deep_rnn_with_dropout(X, y, is_training):\n",
    "    cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
    "    if is_training:\n",
    "        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
    "    multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers)\n",
    "    rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
    "\n",
    "    stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
    "    stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
    "    outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
    "\n",
    "    loss = tf.reduce_sum(tf.square(outputs - y))\n",
    "    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "    training_op = optimizer.minimize(loss)\n",
    "\n",
    "    return outputs, loss, training_op\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
    "outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n",
    "init = tf.initialize_all_variables()\n",
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_iterations = 2000\n",
    "batch_size = 50\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    if is_training:\n",
    "        init.run()\n",
    "        for iteration in range(n_iterations):\n",
    "            X_batch, y_batch = next_batch(batch_size, n_steps)\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "            if iteration % 100 == 0:\n",
    "                mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "                print(iteration, \"\\tMSE:\", mse)\n",
    "        save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
    "    else:\n",
    "        saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
    "        X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
    "        y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
    "        \n",
    "        plt.title(\"Testing the model\", fontsize=14)\n",
    "        plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
    "        plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
    "        plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
    "        plt.legend(loc=\"upper left\")\n",
    "        plt.xlabel(\"Time\")\n",
    "        plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "from tensorflow.contrib.layers import fully_connected\n",
    "\n",
    "n_steps = 28\n",
    "n_inputs = 28\n",
    "n_neurons = 150\n",
    "n_outputs = 10\n",
    "\n",
    "learning_rate = 0.001\n",
    "\n",
    "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
    "y = tf.placeholder(tf.int32, [None])\n",
    "\n",
    "lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons, state_is_tuple=True)\n",
    "multi_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*3, state_is_tuple=True)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
    "top_layer_h_state = states[-1][1]\n",
    "logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n",
    "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
    "loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "training_op = optimizer.minimize(loss)\n",
    "correct = tf.nn.in_top_k(logits, y, 1)\n",
    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
    "    \n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "states"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "top_layer_h_state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_epochs = 10\n",
    "batch_size = 150\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    for epoch in range(n_epochs):\n",
    "        for iteration in range(mnist.train.num_examples // batch_size):\n",
    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
    "            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
    "        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
    "        print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Distributing layers across devices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):\n",
    "  def __init__(self, device, cell):\n",
    "    self._cell = cell\n",
    "    self._device = device\n",
    "\n",
    "  @property\n",
    "  def state_size(self):\n",
    "    return self._cell.state_size\n",
    "\n",
    "  @property\n",
    "  def output_size(self):\n",
    "    return self._cell.output_size\n",
    "\n",
    "  def __call__(self, inputs, state, scope=None):\n",
    "    with tf.device(self._device):\n",
    "        return self._cell(inputs, state, scope)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "n_inputs = 5\n",
    "n_neurons = 100\n",
    "devices = [\"/cpu:0\"]*5\n",
    "n_steps = 20\n",
    "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n",
    "lstm_cells = [DeviceCellWrapper(device, tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons))\n",
    "              for device in devices]\n",
    "multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
    "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
    "init = tf.initialize_all_variables()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init.run()\n",
    "    print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Exercise solutions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Coming soon**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  },
  "nav_menu": {},
  "toc": {
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 6,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}