handson-ml/14_recurrent_neural_network...

1794 lines
50 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Chapter 14 Recurrent Neural Networks**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_This notebook contains all the sample code and solutions to the exercices in chapter 14._"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import numpy.random as rnd\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"rnd.seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"rnn\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then of course we will need TensorFlow:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Basic RNNs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manual RNN"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"\n",
"Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))\n",
"Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))\n",
"b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
"\n",
"Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
"Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(Y0_val)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(Y1_val)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using `rnn()`"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"output_seqs, states = tf.nn.rnn(basic_cell, [X0, X1], dtype=tf.float32)\n",
"Y0, Y1 = output_seqs\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"Y0_val"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"Y1_val"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
"\n",
"def strip_consts(graph_def, max_const_size=32):\n",
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
" strip_def = tf.GraphDef()\n",
" for n0 in graph_def.node:\n",
" n = strip_def.node.add() \n",
" n.MergeFrom(n0)\n",
" if n.op == 'Const':\n",
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
" tensor.tensor_content = \"b<stripped %d bytes>\"%size\n",
" return strip_def\n",
"\n",
"def show_graph(graph_def, max_const_size=32):\n",
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
" if hasattr(graph_def, 'as_graph_def'):\n",
" graph_def = graph_def.as_graph_def()\n",
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
" code = \"\"\"\n",
" <script>\n",
" function load() {{\n",
" document.getElementById(\"{id}\").pbtxt = {data};\n",
" }}\n",
" </script>\n",
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
" <div style=\"height:600px\">\n",
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
" </div>\n",
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
"\n",
" iframe = \"\"\"\n",
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
" \"\"\".format(code.replace('\"', '&quot;'))\n",
" display(HTML(iframe))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Packing sequences"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"X_seqs = tf.unpack(tf.transpose(X, perm=[1, 0, 2]))\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"output_seqs, states = tf.nn.rnn(basic_cell, X_seqs, dtype=tf.float32)\n",
"outputs = tf.transpose(tf.pack(output_seqs), perm=[1, 0, 2])\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_batch = np.array([\n",
" # t = 0 t = 1 \n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using `dynamic_rnn()`"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_batch = np.array([\n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" print(\"outputs =\", outputs.eval(feed_dict={X: X_batch}))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setting the sequence lengths"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"seq_length = tf.placeholder(tf.int32, [None])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, sequence_length=seq_length, dtype=tf.float32)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_batch = np.array([\n",
" # step 0 step 1\n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"seq_length_batch = np.array([2, 1, 2, 2])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val, states_val = sess.run(\n",
" [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(states_val)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training a sequence classifier"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons = 150\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"with tf.variable_scope(\"rnn\", initializer=tf.contrib.layers.variance_scaling_initializer()):\n",
" basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
" outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
"\n",
"logits = fully_connected(states, n_outputs, activation_fn=None)\n",
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
"X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n",
"y_test = mnist.test.labels"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_epochs = 100\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Multi-layer RNN"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons1 = 150\n",
"n_neurons2 = 100\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"hidden1 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons1, activation=tf.nn.relu)\n",
"hidden2 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons2, activation=tf.nn.relu)\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([hidden1, hidden2])\n",
"outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"states = tf.concat(concat_dim=1, values=states_tuple)\n",
"logits = fully_connected(states, n_outputs, activation_fn=None)\n",
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_epochs = 100\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Time series"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"t_min, t_max = 0, 30\n",
"resolution = 0.1\n",
"\n",
"def time_series(t):\n",
" return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
"\n",
"def next_batch(batch_size, n_steps):\n",
" t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
" Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
" ys = time_series(Ts)\n",
" return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"t = np.linspace(t_min, t_max, (t_max - t_min) // resolution)\n",
"\n",
"n_steps = 20\n",
"t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.subplot(121)\n",
"plt.title(\"A time series (generated)\", fontsize=14)\n",
"plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
"plt.legend(loc=\"lower left\", fontsize=14)\n",
"plt.axis([0, 30, -17, 13])\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"\n",
"plt.subplot(122)\n",
"plt.title(\"A training instance\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"\n",
"save_fig(\"time_series_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_batch, y_batch = next_batch(1, n_steps)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.c_[X_batch[0], y_batch[0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using an `OuputProjectionWrapper`"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"cell = tf.nn.rnn_cell.OutputProjectionWrapper(\n",
" tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
" output_size=n_outputs)\n",
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n",
"\n",
"n_outputs = 1\n",
"learning_rate = 0.001\n",
"\n",
"loss = tf.reduce_sum(tf.square(outputs - y))\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_iterations = 1000\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" \n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
" print(y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"save_fig(\"time_series_pred_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Without using an `OutputProjectionWrapper`"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
"\n",
"n_outputs = 1\n",
"learning_rate = 0.001\n",
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
"\n",
"loss = tf.reduce_sum(tf.square(outputs - y))\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_iterations = 1000\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" \n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
" print(y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generating a creative new sequence"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_iterations = 2000\n",
"batch_size = 50\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
"\n",
" sequence1 = [0. for i in range(n_steps)]\n",
" for iteration in range(len(t) - n_steps):\n",
" X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence1.append(y_pred[0, -1, 0])\n",
"\n",
" sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
" for iteration in range(len(t) - n_steps):\n",
" X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence2.append(y_pred[0, -1, 0])\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.subplot(121)\n",
"plt.plot(t, sequence1, \"b-\")\n",
"plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"\n",
"plt.subplot(122)\n",
"plt.plot(t, sequence2, \"b-\")\n",
"plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"#save_fig(\"creative_sequence_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deep RNN"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MultiRNNCell"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_inputs = 2\n",
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 5\n",
"keep_prob = 0.5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([basic_cell] * n_layers)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X_batch = rnd.rand(2, n_steps, n_inputs)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"outputs_val.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dropout"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 20\n",
"n_outputs = 1\n",
"\n",
"keep_prob = 0.5\n",
"learning_rate = 0.001\n",
"\n",
"is_training = True\n",
"\n",
"def deep_rnn_with_dropout(X, y, is_training):\n",
" cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
" if is_training:\n",
" cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
" multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers)\n",
" rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"\n",
" stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
" stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
" outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
"\n",
" loss = tf.reduce_sum(tf.square(outputs - y))\n",
" optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
" training_op = optimizer.minimize(loss)\n",
"\n",
" return outputs, loss, training_op\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"outputs, loss, training_op = deep_rnn_with_dropout(X, y, is_training)\n",
"init = tf.initialize_all_variables()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_iterations = 2000\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" if is_training:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
" else:\n",
" saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
" \n",
" plt.title(\"Testing the model\", fontsize=14)\n",
" plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
" plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
" plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
" plt.legend(loc=\"upper left\")\n",
" plt.xlabel(\"Time\")\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LSTM"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons = 150\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)\n",
"multi_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*3)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
"top_layer_h_state = states[-1][1]\n",
"logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n",
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"states"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"top_layer_h_state"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_epochs = 10\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Distributing layers across devices"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):\n",
" def __init__(self, device, cell):\n",
" self._cell = cell\n",
" self._device = device\n",
"\n",
" @property\n",
" def state_size(self):\n",
" return self._cell.state_size\n",
"\n",
" @property\n",
" def output_size(self):\n",
" return self._cell.output_size\n",
"\n",
" def __call__(self, inputs, state, scope=None):\n",
" with tf.device(self._device):\n",
" return self._cell(inputs, state, scope)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_inputs = 5\n",
"n_neurons = 100\n",
"devices = [\"/cpu:0\"]*5\n",
"n_steps = 20\n",
"X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])\n",
"lstm_cells = [DeviceCellWrapper(device, tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons))\n",
" for device in devices]\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" print(sess.run(outputs, feed_dict={X: rnd.rand(2, n_steps, n_inputs)}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Embeddings"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fetch the data"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from six.moves import urllib\n",
"\n",
"import os\n",
"import zipfile\n",
"import urllib.request\n",
"\n",
"WORDS_PATH = \"datasets/words\"\n",
"WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n",
"\n",
"def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n",
" os.makedirs(words_path, exist_ok=True)\n",
" zip_path = os.path.join(words_path, \"words.zip\")\n",
" if not os.path.exists(zip_path):\n",
" urllib.request.urlretrieve(words_url, zip_path)\n",
" with zipfile.ZipFile(zip_path) as f:\n",
" data = f.read(f.namelist()[0])\n",
" return data.decode(\"ascii\").split()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"words = fetch_words_data()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"words[:5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build the dictionary"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from collections import Counter\n",
"\n",
"vocabulary_size = 50000\n",
"\n",
"vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n",
"vocabulary = np.array([word for word, _ in vocabulary])\n",
"dictionary = {word: code for code, word in enumerate(vocabulary)}\n",
"data = np.array([dictionary.get(word, 0) for word in words])"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\" \".join(words[:9]), data[:9]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"words[24], data[24]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate batches"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import random\n",
"from collections import deque\n",
"\n",
"def generate_batch(batch_size, num_skips, skip_window):\n",
" global data_index\n",
" assert batch_size % num_skips == 0\n",
" assert num_skips <= 2 * skip_window\n",
" batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n",
" labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n",
" span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
" buffer = deque(maxlen=span)\n",
" for _ in range(span):\n",
" buffer.append(data[data_index])\n",
" data_index = (data_index + 1) % len(data)\n",
" for i in range(batch_size // num_skips):\n",
" target = skip_window # target label at the center of the buffer\n",
" targets_to_avoid = [ skip_window ]\n",
" for j in range(num_skips):\n",
" while target in targets_to_avoid:\n",
" target = random.randint(0, span - 1)\n",
" targets_to_avoid.append(target)\n",
" batch[i * num_skips + j] = buffer[skip_window]\n",
" labels[i * num_skips + j, 0] = buffer[target]\n",
" buffer.append(data[data_index])\n",
" data_index = (data_index + 1) % len(data)\n",
" return batch, labels"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data_index=0\n",
"batch, labels = generate_batch(8, 2, 1)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"batch, [vocabulary[word] for word in batch]"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"labels, [vocabulary[word] for word in labels[:, 0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build the model"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"batch_size = 128\n",
"embedding_size = 128 # Dimension of the embedding vector.\n",
"skip_window = 1 # How many words to consider left and right.\n",
"num_skips = 2 # How many times to reuse an input to generate a label.\n",
"\n",
"# We pick a random validation set to sample nearest neighbors. Here we limit the\n",
"# validation samples to the words that have a low numeric ID, which by\n",
"# construction are also the most frequent.\n",
"valid_size = 16 # Random set of words to evaluate similarity on.\n",
"valid_window = 100 # Only pick dev samples in the head of the distribution.\n",
"valid_examples = rnd.choice(valid_window, valid_size, replace=False)\n",
"num_sampled = 64 # Number of negative examples to sample.\n",
"\n",
"learning_rate = 0.01"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"# Input data.\n",
"train_inputs = tf.placeholder(tf.int32, shape=[batch_size])\n",
"train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
"valid_dataset = tf.constant(valid_examples, dtype=tf.int32)\n",
"\n",
"# Look up embeddings for inputs.\n",
"init_embeddings = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
"embeddings = tf.Variable(init_embeddings)\n",
"embed = tf.nn.embedding_lookup(embeddings, train_inputs)\n",
"\n",
"# Construct the variables for the NCE loss\n",
"nce_weights = tf.Variable(\n",
" tf.truncated_normal([vocabulary_size, embedding_size],\n",
" stddev=1.0 / np.sqrt(embedding_size)))\n",
"nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n",
"\n",
"# Compute the average NCE loss for the batch.\n",
"# tf.nce_loss automatically draws a new sample of the negative labels each\n",
"# time we evaluate the loss.\n",
"loss = tf.reduce_mean(\n",
" tf.nn.nce_loss(nce_weights, nce_biases, embed, train_labels,\n",
" num_sampled, vocabulary_size))\n",
"\n",
"# Construct the Adam optimizer\n",
"optimizer = tf.train.AdamOptimizer(learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"# Compute the cosine similarity between minibatch examples and all embeddings.\n",
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), reduction_indices=1, keep_dims=True))\n",
"normalized_embeddings = embeddings / norm\n",
"valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
"similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
"\n",
"# Add variable initializer.\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train the model"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"num_steps = 100001\n",
"\n",
"with tf.Session() as session:\n",
" init.run()\n",
"\n",
" average_loss = 0\n",
" for step in range(num_steps):\n",
" print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n",
" batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n",
" feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n",
"\n",
" # We perform one update step by evaluating the training op (including it\n",
" # in the list of returned values for session.run()\n",
" _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n",
" average_loss += loss_val\n",
"\n",
" if step % 2000 == 0:\n",
" if step > 0:\n",
" average_loss /= 2000\n",
" # The average loss is an estimate of the loss over the last 2000 batches.\n",
" print(\"Average loss at step \", step, \": \", average_loss)\n",
" average_loss = 0\n",
"\n",
" # Note that this is expensive (~20% slowdown if computed every 500 steps)\n",
" if step % 10000 == 0:\n",
" sim = similarity.eval()\n",
" for i in range(valid_size):\n",
" valid_word = vocabulary[valid_examples[i]]\n",
" top_k = 8 # number of nearest neighbors\n",
" nearest = (-sim[i, :]).argsort()[1:top_k+1]\n",
" log_str = \"Nearest to %s:\" % valid_word\n",
" for k in range(top_k):\n",
" close_word = vocabulary[nearest[k]]\n",
" log_str = \"%s %s,\" % (log_str, close_word)\n",
" print(log_str)\n",
"\n",
" final_embeddings = normalized_embeddings.eval()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.save(\"my_final_embeddings.npy\", final_embeddings)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot the embeddings"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def plot_with_labels(low_dim_embs, labels):\n",
" assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n",
" plt.figure(figsize=(18, 18)) #in inches\n",
" for i, label in enumerate(labels):\n",
" x, y = low_dim_embs[i,:]\n",
" plt.scatter(x, y)\n",
" plt.annotate(label,\n",
" xy=(x, y),\n",
" xytext=(5, 2),\n",
" textcoords='offset points',\n",
" ha='right',\n",
" va='bottom')"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.manifold import TSNE\n",
"\n",
"tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n",
"plot_only = 500\n",
"low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n",
"labels = [vocabulary[i] for i in range(plot_only)]\n",
"plot_with_labels(low_dim_embs, labels)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Machine Translation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters."
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"tf.reset_default_graph()\n",
"\n",
"n_steps = 50\n",
"n_neurons = 200\n",
"n_layers = 3\n",
"num_encoder_symbols = 20000\n",
"num_decoder_symbols = 20000\n",
"embedding_size = 150\n",
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n",
"Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n",
"W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n",
"Y_input = Y[:, :-1]\n",
"Y_target = Y[:, 1:]\n",
"\n",
"encoder_inputs = tf.unpack(tf.transpose(X)) # list of 1D tensors\n",
"decoder_inputs = tf.unpack(tf.transpose(Y_input)) # list of 1D tensors\n",
"\n",
"lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)\n",
"cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * n_layers)\n",
"\n",
"output_seqs, states = tf.nn.seq2seq.embedding_rnn_seq2seq(\n",
" encoder_inputs,\n",
" decoder_inputs,\n",
" cell,\n",
" num_encoder_symbols,\n",
" num_decoder_symbols,\n",
" embedding_size)\n",
"\n",
"logits = tf.transpose(tf.pack(output_seqs), perm=[1, 0, 2])"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
"Y_target_flat = tf.reshape(Y_target, [-1])\n",
"W_flat = tf.reshape(W, [-1])\n",
"xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(logits_flat, Y_target_flat)\n",
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.initialize_all_variables()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Coming soon**"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"nav_menu": {},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}