{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "**Chapter 14 – Recurrent Neural Networks**" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "_This notebook contains all the sample code and solutions to the exercices in chapter 14._" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Setup" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "# To support both python 2 and python 3\n", "from __future__ import division, print_function, unicode_literals\n", "\n", "# Common imports\n", "import numpy as np\n", "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", "rnd.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "plt.rcParams['axes.labelsize'] = 14\n", "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"rnn\"\n", "\n", "def save_fig(fig_id, tight_layout=True):\n", " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format='png', dpi=300)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Then of course we will need TensorFlow:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "import tensorflow as tf" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Basic RNNs" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Manual RNN" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 3\n", "n_neurons = 5\n", "\n", "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", "\n", "Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n", "Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n", "b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n", "\n", "Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n", "Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n", "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(Y0_val)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(Y1_val)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Using `rnn()`" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 3\n", "n_neurons = 5\n", "\n", "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1], dtype=tf.float32)\n", "Y0, Y1 = output_seqs\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n", "X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "Y0_val" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "Y1_val" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "from IPython.display import clear_output, Image, display, HTML\n", "\n", "def strip_consts(graph_def, max_const_size=32):\n", " \"\"\"Strip large constant values from graph_def.\"\"\"\n", " strip_def = tf.GraphDef()\n", " for n0 in graph_def.node:\n", " n = strip_def.node.add() \n", " n.MergeFrom(n0)\n", " if n.op == 'Const':\n", " tensor = n.attr['value'].tensor\n", " size = len(tensor.tensor_content)\n", " if size > max_const_size:\n", " tensor.tensor_content = \"b\"%size\n", " return strip_def\n", "\n", "def show_graph(graph_def, max_const_size=32):\n", " \"\"\"Visualize TensorFlow graph.\"\"\"\n", " if hasattr(graph_def, 'as_graph_def'):\n", " graph_def = graph_def.as_graph_def()\n", " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", " code = \"\"\"\n", " \n", " \n", "
\n", " \n", "
\n", " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", "\n", " iframe = \"\"\"\n", " \n", " \"\"\".format(code.replace('\"', '"'))\n", " display(HTML(iframe))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Packing sequences" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_steps = 2\n", "n_inputs = 3\n", "n_neurons = 5\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", "output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32)\n", "outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "X_batch = np.array([\n", " # t = 0 t = 1 \n", " [[0, 1, 2], [9, 8, 7]], # instance 1\n", " [[3, 4, 5], [0, 0, 0]], # instance 2\n", " [[6, 7, 8], [6, 5, 4]], # instance 3\n", " [[9, 0, 1], [3, 2, 1]], # instance 4\n", " ])\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " outputs_val = outputs.eval(feed_dict={X: X_batch})" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(np.transpose(outputs_val, axes=[1, 0, 2])[1])" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Using `dynamic_rnn()`" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_steps = 2\n", "n_inputs = 3\n", "n_neurons = 5\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "X_batch = np.array([\n", " [[0, 1, 2], [9, 8, 7]], # instance 1\n", " [[3, 4, 5], [0, 0, 0]], # instance 2\n", " [[6, 7, 8], [6, 5, 4]], # instance 3\n", " [[9, 0, 1], [3, 2, 1]], # instance 4\n", " ])\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " print(\"outputs =\", outputs.eval(feed_dict={X: X_batch}))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Setting the sequence lengths" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_steps = 2\n", "n_inputs = 3\n", "n_neurons = 5\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "seq_length = tf.placeholder(tf.int32, [None])\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", "outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "X_batch = np.array([\n", " # step 0 step 1\n", " [[0, 1, 2], [9, 8, 7]], # instance 1\n", " [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n", " [[6, 7, 8], [6, 5, 4]], # instance 3\n", " [[9, 0, 1], [3, 2, 1]], # instance 4\n", " ])\n", "seq_length_batch = np.array([2, 1, 2, 2])\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " outputs_val, states_val = sess.run(\n", " [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(outputs_val)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(states_val)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Training a sequence classifier" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons = 150\n", "n_outputs = 10\n", "\n", "learning_rate = 0.001\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.int32, [None])\n", "\n", "with tf.variable_scope(\"rnn\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n", " basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", " outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", "\n", "logits = fully_connected(states, n_outputs, activation_fn=None)\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "correct = tf.nn.in_top_k(logits, y, 1)\n", "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from tensorflow.examples.tutorials.mnist import input_data\n", "mnist = input_data.read_data_sets(\"/tmp/data/\")\n", "X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n", "y_test = mnist.test.labels" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 100\n", "batch_size = 150\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(mnist.train.num_examples // batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Multi-layer RNN" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons1 = 150\n", "n_neurons2 = 100\n", "n_outputs = 10\n", "\n", "learning_rate = 0.001\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.int32, [None])\n", "\n", "hidden1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n", "hidden2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n", "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n", "outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "states = tf.concat(axis=1, values=states_tuple)\n", "logits = fully_connected(states, n_outputs, activation_fn=None)\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "correct = tf.nn.in_top_k(logits, y, 1)\n", "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 100\n", "batch_size = 150\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(mnist.train.num_examples // batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Time series" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "t_min, t_max = 0, 30\n", "resolution = 0.1\n", "\n", "def time_series(t):\n", " return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n", "\n", "def next_batch(batch_size, n_steps):\n", " t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n", " Ts = t0 + np.arange(0., n_steps + 1) * resolution\n", " ys = time_series(Ts)\n", " return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n", "\n", "n_steps = 20\n", "t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n", "\n", "plt.figure(figsize=(11,4))\n", "plt.subplot(121)\n", "plt.title(\"A time series (generated)\", fontsize=14)\n", "plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n", "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n", "plt.legend(loc=\"lower left\", fontsize=14)\n", "plt.axis([0, 30, -17, 13])\n", "plt.xlabel(\"Time\")\n", "plt.ylabel(\"Value\")\n", "\n", "plt.subplot(122)\n", "plt.title(\"A training instance\", fontsize=14)\n", "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", "plt.legend(loc=\"upper left\")\n", "plt.xlabel(\"Time\")\n", "\n", "\n", "save_fig(\"time_series_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "X_batch, y_batch = next_batch(1, n_steps)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.c_[X_batch[0], y_batch[0]]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Using an `OuputProjectionWrapper`" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_steps = 20\n", "n_inputs = 1\n", "n_neurons = 100\n", "n_outputs = 1\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", "\n", "cell = tf.contrib.rnn.OutputProjectionWrapper(\n", " tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n", " output_size=n_outputs)\n", "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n", "\n", "n_outputs = 1\n", "learning_rate = 0.001\n", "\n", "loss = tf.reduce_sum(tf.square(outputs - y))\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_iterations = 1000\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " X_batch, y_batch = next_batch(batch_size, n_steps)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " if iteration % 100 == 0:\n", " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", " print(iteration, \"\\tMSE:\", mse)\n", " \n", " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", " print(y_pred)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.title(\"Testing the model\", fontsize=14)\n", "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", "plt.legend(loc=\"upper left\")\n", "plt.xlabel(\"Time\")\n", "\n", "save_fig(\"time_series_pred_plot\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Without using an `OutputProjectionWrapper`" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_steps = 20\n", "n_inputs = 1\n", "n_neurons = 100\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", "rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n", "\n", "n_outputs = 1\n", "learning_rate = 0.001\n", "\n", "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", "stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n", "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", "\n", "loss = tf.reduce_sum(tf.square(outputs - y))\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_iterations = 1000\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " X_batch, y_batch = next_batch(batch_size, n_steps)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " if iteration % 100 == 0:\n", " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", " print(iteration, \"\\tMSE:\", mse)\n", " \n", " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", " print(y_pred)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.title(\"Testing the model\", fontsize=14)\n", "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", "plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", "plt.legend(loc=\"upper left\")\n", "plt.xlabel(\"Time\")\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Generating a creative new sequence" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_iterations = 2000\n", "batch_size = 50\n", "with tf.Session() as sess:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " X_batch, y_batch = next_batch(batch_size, n_steps)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " if iteration % 100 == 0:\n", " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", " print(iteration, \"\\tMSE:\", mse)\n", "\n", " sequence1 = [0. for i in range(n_steps)]\n", " for iteration in range(len(t) - n_steps):\n", " X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n", " y_pred = sess.run(outputs, feed_dict={X: X_batch})\n", " sequence1.append(y_pred[0, -1, 0])\n", "\n", " sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n", " for iteration in range(len(t) - n_steps):\n", " X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n", " y_pred = sess.run(outputs, feed_dict={X: X_batch})\n", " sequence2.append(y_pred[0, -1, 0])\n", "\n", "plt.figure(figsize=(11,4))\n", "plt.subplot(121)\n", "plt.plot(t, sequence1, \"b-\")\n", "plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n", "plt.xlabel(\"Time\")\n", "plt.ylabel(\"Value\")\n", "\n", "plt.subplot(122)\n", "plt.plot(t, sequence2, \"b-\")\n", "plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n", "plt.xlabel(\"Time\")\n", "#save_fig(\"creative_sequence_plot\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Deep RNN" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## MultiRNNCell" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 2\n", "n_neurons = 100\n", "n_layers = 3\n", "n_steps = 5\n", "keep_prob = 0.5\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", "multi_layer_cell = tf.contrib.rnn.MultiRNNCell([basic_cell] * n_layers)\n", "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "X_batch = rnd.rand(2, n_steps, n_inputs)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " init.run()\n", " outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "outputs_val.shape" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Dropout" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_inputs = 1\n", "n_neurons = 100\n", "n_layers = 3\n", "n_steps = 20\n", "n_outputs = 1\n", "\n", "keep_prob = 0.5\n", "learning_rate = 0.001\n", "\n", "is_training = True\n", "\n", "def deep_rnn_with_dropout(X, y, is_training):\n", " cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", " if is_training:\n", " cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", " multi_layer_cell = tf.contrib.rnn.MultiRNNCell([cell] * n_layers)\n", " rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "\n", " stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", " stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n", " outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", "\n", " loss = tf.reduce_sum(tf.square(outputs - y))\n", " optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", " training_op = optimizer.minimize(loss)\n", "\n", " return outputs, loss, training_op\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", "outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_iterations = 2000\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", " if is_training:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " X_batch, y_batch = next_batch(batch_size, n_steps)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " if iteration % 100 == 0:\n", " mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n", " print(iteration, \"\\tMSE:\", mse)\n", " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", " else:\n", " saver.restore(sess, \"/tmp/my_model.ckpt\")\n", " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", " \n", " plt.title(\"Testing the model\", fontsize=14)\n", " plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", " plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", " plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n", " plt.legend(loc=\"upper left\")\n", " plt.xlabel(\"Time\")\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# LSTM" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from tensorflow.contrib.layers import fully_connected\n", "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons = 150\n", "n_outputs = 10\n", "\n", "learning_rate = 0.001\n", "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "y = tf.placeholder(tf.int32, [None])\n", "\n", "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n", "multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n", "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n", "top_layer_h_state = states[-1][1]\n", "logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n", "xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", "loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "correct = tf.nn.in_top_k(logits, y, 1)\n", "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "states" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "top_layer_h_state" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 10\n", "batch_size = 150\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(mnist.train.num_examples // batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n", " print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Distributing layers across devices" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import tensorflow as tf\n", "\n", "class DeviceCellWrapper(tf.contrib.rnn.RNNCell):\n", " def __init__(self, device, cell):\n", " self._cell = cell\n", " self._device = device\n", "\n", " @property\n", " def state_size(self):\n", " return self._cell.state_size\n", "\n", " @property\n", " def output_size(self):\n", " return self._cell.output_size\n", "\n", " def __call__(self, inputs, state, scope=None):\n", " with tf.device(self._device):\n", " return self._cell(inputs, state, scope)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 5\n", "n_neurons = 100\n", "devices = [\"/cpu:0\"]*5\n", "n_steps = 20\n", "X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n", "lstm_cells = [DeviceCellWrapper(device, tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n", " for device in devices]\n", "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n", "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " init.run()\n", " print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Embeddings" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)." ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Fetch the data" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "from six.moves import urllib\n", "\n", "import errno\n", "import os\n", "import zipfile\n", "\n", "WORDS_PATH = \"datasets/words\"\n", "WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n", "\n", "def mkdir_p(path):\n", " \"\"\"Create directories, ok if they already exist.\n", " \n", " This is for python 2 support. In python >=3.2, simply use:\n", " >>> os.makedirs(path, exist_ok=True)\n", " \"\"\"\n", " try:\n", " os.makedirs(path)\n", " except OSError as exc:\n", " if exc.errno == errno.EEXIST and os.path.isdir(path):\n", " pass\n", " else:\n", " raise\n", "\n", "def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n", " os.makedirs(words_path, exist_ok=True)\n", " zip_path = os.path.join(words_path, \"words.zip\")\n", " if not os.path.exists(zip_path):\n", " urllib.request.urlretrieve(words_url, zip_path)\n", " with zipfile.ZipFile(zip_path) as f:\n", " data = f.read(f.namelist()[0])\n", " return data.decode(\"ascii\").split()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "words = fetch_words_data()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "words[:5]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Build the dictionary" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from collections import Counter\n", "\n", "vocabulary_size = 50000\n", "\n", "vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n", "vocabulary = np.array([word for word, _ in vocabulary])\n", "dictionary = {word: code for code, word in enumerate(vocabulary)}\n", "data = np.array([dictionary.get(word, 0) for word in words])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "\" \".join(words[:9]), data[:9]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "words[24], data[24]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Generate batches" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "import random\n", "from collections import deque\n", "\n", "def generate_batch(batch_size, num_skips, skip_window):\n", " global data_index\n", " assert batch_size % num_skips == 0\n", " assert num_skips <= 2 * skip_window\n", " batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n", " labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n", " span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n", " buffer = deque(maxlen=span)\n", " for _ in range(span):\n", " buffer.append(data[data_index])\n", " data_index = (data_index + 1) % len(data)\n", " for i in range(batch_size // num_skips):\n", " target = skip_window # target label at the center of the buffer\n", " targets_to_avoid = [ skip_window ]\n", " for j in range(num_skips):\n", " while target in targets_to_avoid:\n", " target = random.randint(0, span - 1)\n", " targets_to_avoid.append(target)\n", " batch[i * num_skips + j] = buffer[skip_window]\n", " labels[i * num_skips + j, 0] = buffer[target]\n", " buffer.append(data[data_index])\n", " data_index = (data_index + 1) % len(data)\n", " return batch, labels" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "data_index=0\n", "batch, labels = generate_batch(8, 2, 1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "batch, [vocabulary[word] for word in batch]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "labels, [vocabulary[word] for word in labels[:, 0]]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Build the model" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "batch_size = 128\n", "embedding_size = 128 # Dimension of the embedding vector.\n", "skip_window = 1 # How many words to consider left and right.\n", "num_skips = 2 # How many times to reuse an input to generate a label.\n", "\n", "# We pick a random validation set to sample nearest neighbors. Here we limit the\n", "# validation samples to the words that have a low numeric ID, which by\n", "# construction are also the most frequent.\n", "valid_size = 16 # Random set of words to evaluate similarity on.\n", "valid_window = 100 # Only pick dev samples in the head of the distribution.\n", "valid_examples = rnd.choice(valid_window, valid_size, replace=False)\n", "num_sampled = 64 # Number of negative examples to sample.\n", "\n", "learning_rate = 0.01" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "# Input data.\n", "train_inputs = tf.placeholder(tf.int32, shape=[batch_size])\n", "train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n", "valid_dataset = tf.constant(valid_examples, dtype=tf.int32)\n", "\n", "# Look up embeddings for inputs.\n", "init_embeddings = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n", "embeddings = tf.Variable(init_embeddings)\n", "embed = tf.nn.embedding_lookup(embeddings, train_inputs)\n", "\n", "# Construct the variables for the NCE loss\n", "nce_weights = tf.Variable(\n", " tf.truncated_normal([vocabulary_size, embedding_size],\n", " stddev=1.0 / np.sqrt(embedding_size)))\n", "nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n", "\n", "# Compute the average NCE loss for the batch.\n", "# tf.nce_loss automatically draws a new sample of the negative labels each\n", "# time we evaluate the loss.\n", "loss = tf.reduce_mean(\n", " tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,\n", " num_sampled, vocabulary_size))\n", "\n", "# Construct the Adam optimizer\n", "optimizer = tf.train.AdamOptimizer(learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "\n", "# Compute the cosine similarity between minibatch examples and all embeddings.\n", "norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))\n", "normalized_embeddings = embeddings / norm\n", "valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n", "similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n", "\n", "# Add variable initializer.\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Train the model" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "num_steps = 100001\n", "\n", "with tf.Session() as session:\n", " init.run()\n", "\n", " average_loss = 0\n", " for step in range(num_steps):\n", " print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n", " batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n", " feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n", "\n", " # We perform one update step by evaluating the training op (including it\n", " # in the list of returned values for session.run()\n", " _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n", " average_loss += loss_val\n", "\n", " if step % 2000 == 0:\n", " if step > 0:\n", " average_loss /= 2000\n", " # The average loss is an estimate of the loss over the last 2000 batches.\n", " print(\"Average loss at step \", step, \": \", average_loss)\n", " average_loss = 0\n", "\n", " # Note that this is expensive (~20% slowdown if computed every 500 steps)\n", " if step % 10000 == 0:\n", " sim = similarity.eval()\n", " for i in range(valid_size):\n", " valid_word = vocabulary[valid_examples[i]]\n", " top_k = 8 # number of nearest neighbors\n", " nearest = (-sim[i, :]).argsort()[1:top_k+1]\n", " log_str = \"Nearest to %s:\" % valid_word\n", " for k in range(top_k):\n", " close_word = vocabulary[nearest[k]]\n", " log_str = \"%s %s,\" % (log_str, close_word)\n", " print(log_str)\n", "\n", " final_embeddings = normalized_embeddings.eval()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.save(\"./my_final_embeddings.npy\", final_embeddings)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Plot the embeddings" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def plot_with_labels(low_dim_embs, labels):\n", " assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n", " plt.figure(figsize=(18, 18)) #in inches\n", " for i, label in enumerate(labels):\n", " x, y = low_dim_embs[i,:]\n", " plt.scatter(x, y)\n", " plt.annotate(label,\n", " xy=(x, y),\n", " xytext=(5, 2),\n", " textcoords='offset points',\n", " ha='right',\n", " va='bottom')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from sklearn.manifold import TSNE\n", "\n", "tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n", "plot_only = 500\n", "low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n", "labels = [vocabulary[i] for i in range(plot_only)]\n", "plot_with_labels(low_dim_embs, labels)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Machine Translation" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters." ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import tensorflow as tf\n", "tf.reset_default_graph()\n", "\n", "n_steps = 50\n", "n_neurons = 200\n", "n_layers = 3\n", "num_encoder_symbols = 20000\n", "num_decoder_symbols = 20000\n", "embedding_size = 150\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n", "Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n", "W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n", "Y_input = Y[:, :-1]\n", "Y_target = Y[:, 1:]\n", "\n", "encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n", "decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n", "\n", "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n", "cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * n_layers)\n", "\n", "output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n", " encoder_inputs,\n", " decoder_inputs,\n", " cell,\n", " num_encoder_symbols,\n", " num_decoder_symbols,\n", " embedding_size)\n", "\n", "logits = tf.transpose(tf.unstack(output_seqs), perm=[1, 0, 2])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n", "Y_target_flat = tf.reshape(Y_target, [-1])\n", "W_flat = tf.reshape(W, [-1])\n", "xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y_target_flat, logits=logits_flat)\n", "loss = tf.reduce_mean(xentropy)\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true, "deletable": true, "editable": true }, "source": [ "# Exercise solutions" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "**Coming soon**" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2+" }, "nav_menu": {}, "toc": { "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 6, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }