handson-ml/14_recurrent_neural_network...

2742 lines
70 KiB
Plaintext
Raw Normal View History

2016-09-27 23:31:21 +02:00
{
"cells": [
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"**Chapter 14 Recurrent Neural Networks**"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"_This notebook contains all the sample code and solutions to the exercices in chapter 14._"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"def reset_graph(seed=42):\n",
" tf.reset_default_graph()\n",
" tf.set_random_seed(seed)\n",
" np.random.seed(seed)\n",
2016-09-27 23:31:21 +02:00
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"rnn\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"Then of course we will need TensorFlow:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Basic RNNs"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Manual RNN"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"\n",
"Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n",
"Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n",
2016-09-27 23:31:21 +02:00
"b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
"\n",
"Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
"Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
2016-09-27 23:31:21 +02:00
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"print(Y0_val)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"print(Y1_val)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Using `static_rnn()`"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_inputs = 3\n",
"n_neurons = 5"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"\n",
2017-02-17 11:51:26 +01:00
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
"output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],\n",
" dtype=tf.float32)\n",
"Y0, Y1 = output_seqs"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 10,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
]
},
{
"cell_type": "code",
"execution_count": 11,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"Y0_val"
]
},
{
"cell_type": "code",
"execution_count": 12,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"Y1_val"
]
},
{
"cell_type": "code",
"execution_count": 13,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
"\n",
"def strip_consts(graph_def, max_const_size=32):\n",
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
" strip_def = tf.GraphDef()\n",
" for n0 in graph_def.node:\n",
" n = strip_def.node.add() \n",
" n.MergeFrom(n0)\n",
" if n.op == 'Const':\n",
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
" tensor.tensor_content = \"b<stripped %d bytes>\"%size\n",
" return strip_def\n",
"\n",
"def show_graph(graph_def, max_const_size=32):\n",
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
" if hasattr(graph_def, 'as_graph_def'):\n",
" graph_def = graph_def.as_graph_def()\n",
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
" code = \"\"\"\n",
" <script>\n",
" function load() {{\n",
" document.getElementById(\"{id}\").pbtxt = {data};\n",
" }}\n",
" </script>\n",
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
" <div style=\"height:600px\">\n",
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
" </div>\n",
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
"\n",
" iframe = \"\"\"\n",
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
" \"\"\".format(code.replace('\"', '&quot;'))\n",
" display(HTML(iframe))"
]
},
{
"cell_type": "code",
"execution_count": 14,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Packing sequences"
]
},
{
"cell_type": "code",
"execution_count": 15,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
2017-02-17 11:51:26 +01:00
"X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n",
2016-09-27 23:31:21 +02:00
"\n",
2017-02-17 11:51:26 +01:00
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
"output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,\n",
" dtype=tf.float32)\n",
"outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 18,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"X_batch = np.array([\n",
" # t = 0 t = 1 \n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 20,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Using `dynamic_rnn()`"
]
},
{
"cell_type": "code",
"execution_count": 21,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"\n",
2017-02-17 11:51:26 +01:00
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 24,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"X_batch = np.array([\n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 26,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Setting the sequence lengths"
]
},
{
"cell_type": "code",
"execution_count": 27,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"seq_length = tf.placeholder(tf.int32, [None])\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n",
" sequence_length=seq_length)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 30,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"X_batch = np.array([\n",
" # step 0 step 1\n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"seq_length_batch = np.array([2, 1, 2, 2])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val, states_val = sess.run(\n",
" [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
]
},
{
"cell_type": "code",
"execution_count": 32,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 33,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"print(states_val)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Training a sequence classifier"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
"* the default `activation` is now `None` rather than `tf.nn.relu`."
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 34,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons = 150\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
2016-09-27 23:31:21 +02:00
"\n",
"logits = tf.layers.dense(states, n_outputs)\n",
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
" logits=logits)\n",
2016-09-27 23:31:21 +02:00
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 35,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
"X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n",
"y_test = mnist.test.labels"
]
},
{
"cell_type": "code",
"execution_count": 36,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_epochs = 100\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Multi-layer RNN"
]
},
{
"cell_type": "code",
"execution_count": 37,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_neurons = 100\n",
"n_layers = 3\n",
2016-09-27 23:31:21 +02:00
"\n",
"layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,\n",
" activation=tf.nn.relu)\n",
" for layer in range(n_layers)]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"states_concat = tf.concat(axis=1, values=states)\n",
"logits = tf.layers.dense(states_concat, n_outputs)\n",
2017-02-17 11:51:26 +01:00
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
2016-09-27 23:31:21 +02:00
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 40,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_epochs = 10\n",
2016-09-27 23:31:21 +02:00
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Time series"
]
},
{
"cell_type": "code",
"execution_count": 41,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"t_min, t_max = 0, 30\n",
"resolution = 0.1\n",
"\n",
"def time_series(t):\n",
" return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
"\n",
"def next_batch(batch_size, n_steps):\n",
" t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
" Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
" ys = time_series(Ts)\n",
" return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
]
},
{
"cell_type": "code",
"execution_count": 42,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 20\n",
"t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.subplot(121)\n",
"plt.title(\"A time series (generated)\", fontsize=14)\n",
"plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
"plt.legend(loc=\"lower left\", fontsize=14)\n",
"plt.axis([0, 30, -17, 13])\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"\n",
"plt.subplot(122)\n",
"plt.title(\"A training instance\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"\n",
"save_fig(\"time_series_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 43,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"X_batch, y_batch = next_batch(1, n_steps)"
]
},
{
"cell_type": "code",
"execution_count": 44,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"np.c_[X_batch[0], y_batch[0]]"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Using an `OuputProjectionWrapper`"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:"
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 45,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"source": [
"At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`."
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
" tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
" output_size=n_outputs)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"learning_rate = 0.001\n",
"\n",
"loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_iterations = 1500\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" \n",
" saver.save(sess, \"./my_time_series_model\") # not shown in the book"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.Session() as sess: # not shown in the book\n",
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
"\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 54,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"save_fig(\"time_series_pred_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Without using an `OutputProjectionWrapper`"
]
},
{
"cell_type": "code",
"execution_count": 55,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"n_outputs = 1\n",
"learning_rate = 0.001"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
2016-09-27 23:31:21 +02:00
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 60,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_iterations = 1500\n",
2016-09-27 23:31:21 +02:00
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" \n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
" \n",
" saver.save(sess, \"./my_time_series_model\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 62,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Generating a creative new sequence"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.Session() as sess: # not shown in the book\n",
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
"\n",
" sequence = [0.] * n_steps\n",
" for iteration in range(300):\n",
" X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence.append(y_pred[0, -1, 0])"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"plt.figure(figsize=(8,4))\n",
"plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n",
"plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 65,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, \"./my_time_series_model\")\n",
2016-09-27 23:31:21 +02:00
"\n",
" sequence1 = [0. for i in range(n_steps)]\n",
" for iteration in range(len(t) - n_steps):\n",
" X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence1.append(y_pred[0, -1, 0])\n",
"\n",
" sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
" for iteration in range(len(t) - n_steps):\n",
" X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence2.append(y_pred[0, -1, 0])\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.subplot(121)\n",
"plt.plot(t, sequence1, \"b-\")\n",
"plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"\n",
"plt.subplot(122)\n",
"plt.plot(t, sequence2, \"b-\")\n",
"plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"save_fig(\"creative_sequence_plot\")\n",
2016-09-27 23:31:21 +02:00
"plt.show()"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Deep RNN"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## MultiRNNCell"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_inputs = 2\n",
"n_steps = 5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_neurons = 100\n",
"n_layers = 3\n",
"\n",
"layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X_batch = np.random.rand(2, n_steps, n_inputs)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"outputs_val.shape"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## Distributing a Deep RNN Across Multiple GPUs"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Do **NOT** do this:"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n",
" layer1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
"\n",
"with tf.device(\"/gpu:1\"): # BAD! Ignored again.\n",
" layer2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Instead, you need a `DeviceCellWrapper`:"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"class DeviceCellWrapper(tf.contrib.rnn.RNNCell):\n",
" def __init__(self, device, cell):\n",
" self._cell = cell\n",
" self._device = device\n",
"\n",
" @property\n",
" def state_size(self):\n",
" return self._cell.state_size\n",
"\n",
" @property\n",
" def output_size(self):\n",
" return self._cell.output_size\n",
"\n",
" def __call__(self, inputs, state, scope=None):\n",
" with tf.device(self._device):\n",
" return self._cell(inputs, state, scope)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_inputs = 5\n",
"n_steps = 20\n",
"n_neurons = 100\n",
"\n",
"X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
"cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n",
" for dev in devices]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true,
"scrolled": true
2017-02-17 11:51:26 +01:00
},
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Dropout"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 78,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_inputs = 1\n",
2016-09-27 23:31:21 +02:00
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 20\n",
"n_outputs = 1\n",
2016-09-27 23:31:21 +02:00
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 79,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": false,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"keep_prob = 0.5\n",
"\n",
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
" for cell in cells]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 80,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"learning_rate = 0.01\n",
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
"\n",
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`."
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 81,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_iterations = 1000\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" print(iteration, \"Training MSE:\", mse)\n",
" \n",
" saver.save(sess, \"./my_dropout_time_series_model\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 82,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 20\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
2016-09-27 23:31:21 +02:00
"\n",
"keep_prob = 0.5\n",
2016-09-27 23:31:21 +02:00
"\n",
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
2016-09-27 23:31:21 +02:00
"\n",
"learning_rate = 0.01\n",
2016-09-27 23:31:21 +02:00
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
2016-09-27 23:31:21 +02:00
"\n",
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
2016-09-27 23:31:21 +02:00
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()\n",
2016-09-27 23:31:21 +02:00
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 83,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, \"./my_dropout_time_series_model\")\n",
"\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
"\n",
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Oops, it seems that Dropout does not help at all in this particular case. :/"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"import sys\n",
"training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"if training:\n",
" cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
" for cell in cells]\n",
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) # not shown in the book\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) # not shown\n",
"loss = tf.reduce_mean(tf.square(outputs - y)) # not shown\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # not shown\n",
"training_op = optimizer.minimize(loss) # not shown\n",
"init = tf.global_variables_initializer() # not shown\n",
"saver = tf.train.Saver() # not shown\n",
2016-09-27 23:31:21 +02:00
"\n",
"with tf.Session() as sess:\n",
" if training:\n",
2016-09-27 23:31:21 +02:00
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps) # not shown\n",
" _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n",
" if iteration % 100 == 0: # not shown\n",
" print(iteration, \"Training MSE:\", mse) # not shown\n",
2016-09-27 23:31:21 +02:00
" save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
" else:\n",
" saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new}) # not shown"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# LSTM"
]
},
{
"cell_type": "code",
"execution_count": 85,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons = 150\n",
"n_outputs = 10\n",
"n_layers = 3\n",
2016-09-27 23:31:21 +02:00
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
2016-09-27 23:31:21 +02:00
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
"top_layer_h_state = states[-1][1]\n",
"logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
2017-02-17 11:51:26 +01:00
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
2016-09-27 23:31:21 +02:00
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 87,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"states"
]
},
{
"cell_type": "code",
"execution_count": 88,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"top_layer_h_state"
]
},
{
"cell_type": "code",
"execution_count": 89,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true,
"scrolled": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_epochs = 10\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
]
},
{
"cell_type": "code",
"execution_count": 90,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 91,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"# Embeddings"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)."
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"## Fetch the data"
]
},
{
"cell_type": "code",
"execution_count": 92,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
2016-11-24 17:23:11 +01:00
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
2016-11-24 17:23:11 +01:00
"from six.moves import urllib\n",
"\n",
"import errno\n",
2016-11-24 17:23:11 +01:00
"import os\n",
"import zipfile\n",
"\n",
"WORDS_PATH = \"datasets/words\"\n",
"WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n",
"\n",
"def mkdir_p(path):\n",
" \"\"\"Create directories, ok if they already exist.\n",
" \n",
" This is for python 2 support. In python >=3.2, simply use:\n",
" >>> os.makedirs(path, exist_ok=True)\n",
" \"\"\"\n",
" try:\n",
" os.makedirs(path)\n",
" except OSError as exc:\n",
" if exc.errno == errno.EEXIST and os.path.isdir(path):\n",
" pass\n",
" else:\n",
" raise\n",
"\n",
2016-11-24 17:23:11 +01:00
"def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n",
" os.makedirs(words_path, exist_ok=True)\n",
" zip_path = os.path.join(words_path, \"words.zip\")\n",
" if not os.path.exists(zip_path):\n",
" urllib.request.urlretrieve(words_url, zip_path)\n",
" with zipfile.ZipFile(zip_path) as f:\n",
" data = f.read(f.namelist()[0])\n",
" return data.decode(\"ascii\").split()"
]
},
{
"cell_type": "code",
"execution_count": 93,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"words = fetch_words_data()"
]
},
{
"cell_type": "code",
"execution_count": 94,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"words[:5]"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
2016-11-24 17:23:11 +01:00
"## Build the dictionary"
]
},
{
"cell_type": "code",
"execution_count": 95,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"from collections import Counter\n",
"\n",
"vocabulary_size = 50000\n",
"\n",
"vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n",
"vocabulary = np.array([word for word, _ in vocabulary])\n",
"dictionary = {word: code for code, word in enumerate(vocabulary)}\n",
"data = np.array([dictionary.get(word, 0) for word in words])"
]
},
{
"cell_type": "code",
"execution_count": 96,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"\" \".join(words[:9]), data[:9]"
]
},
{
"cell_type": "code",
"execution_count": 97,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])"
]
},
{
"cell_type": "code",
"execution_count": 98,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"words[24], data[24]"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"## Generate batches"
]
},
{
"cell_type": "code",
"execution_count": 99,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"import random\n",
"from collections import deque\n",
"\n",
"def generate_batch(batch_size, num_skips, skip_window):\n",
" global data_index\n",
" assert batch_size % num_skips == 0\n",
" assert num_skips <= 2 * skip_window\n",
" batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n",
" labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n",
" span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
" buffer = deque(maxlen=span)\n",
" for _ in range(span):\n",
" buffer.append(data[data_index])\n",
" data_index = (data_index + 1) % len(data)\n",
" for i in range(batch_size // num_skips):\n",
" target = skip_window # target label at the center of the buffer\n",
" targets_to_avoid = [ skip_window ]\n",
" for j in range(num_skips):\n",
" while target in targets_to_avoid:\n",
" target = random.randint(0, span - 1)\n",
" targets_to_avoid.append(target)\n",
" batch[i * num_skips + j] = buffer[skip_window]\n",
" labels[i * num_skips + j, 0] = buffer[target]\n",
" buffer.append(data[data_index])\n",
" data_index = (data_index + 1) % len(data)\n",
" return batch, labels"
]
},
{
"cell_type": "code",
"execution_count": 100,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"data_index=0\n",
"batch, labels = generate_batch(8, 2, 1)"
]
},
{
"cell_type": "code",
"execution_count": 101,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"batch, [vocabulary[word] for word in batch]"
]
},
{
"cell_type": "code",
"execution_count": 102,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"labels, [vocabulary[word] for word in labels[:, 0]]"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"## Build the model"
]
},
{
"cell_type": "code",
"execution_count": 103,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"batch_size = 128\n",
"embedding_size = 128 # Dimension of the embedding vector.\n",
"skip_window = 1 # How many words to consider left and right.\n",
"num_skips = 2 # How many times to reuse an input to generate a label.\n",
"\n",
"# We pick a random validation set to sample nearest neighbors. Here we limit the\n",
"# validation samples to the words that have a low numeric ID, which by\n",
"# construction are also the most frequent.\n",
"valid_size = 16 # Random set of words to evaluate similarity on.\n",
"valid_window = 100 # Only pick dev samples in the head of the distribution.\n",
"valid_examples = np.random.choice(valid_window, valid_size, replace=False)\n",
2016-11-24 17:23:11 +01:00
"num_sampled = 64 # Number of negative examples to sample.\n",
"\n",
"learning_rate = 0.01"
]
},
{
"cell_type": "code",
"execution_count": 104,
2016-11-24 17:23:11 +01:00
"metadata": {
"collapsed": true,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-11-24 17:23:11 +01:00
"\n",
"# Input data.\n",
"train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
"valid_dataset = tf.constant(valid_examples, dtype=tf.int32)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"vocabulary_size = 50000\n",
"embedding_size = 150\n",
2016-11-24 17:23:11 +01:00
"\n",
"# Look up embeddings for inputs.\n",
"init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
"embeddings = tf.Variable(init_embeds)"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
"embed = tf.nn.embedding_lookup(embeddings, train_inputs)"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-11-24 17:23:11 +01:00
"# Construct the variables for the NCE loss\n",
"nce_weights = tf.Variable(\n",
" tf.truncated_normal([vocabulary_size, embedding_size],\n",
" stddev=1.0 / np.sqrt(embedding_size)))\n",
"nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n",
"\n",
"# Compute the average NCE loss for the batch.\n",
"# tf.nce_loss automatically draws a new sample of the negative labels each\n",
"# time we evaluate the loss.\n",
"loss = tf.reduce_mean(\n",
2017-02-17 11:51:26 +01:00
" tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,\n",
2016-11-24 17:23:11 +01:00
" num_sampled, vocabulary_size))\n",
"\n",
"# Construct the Adam optimizer\n",
"optimizer = tf.train.AdamOptimizer(learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"# Compute the cosine similarity between minibatch examples and all embeddings.\n",
2017-02-17 11:51:26 +01:00
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))\n",
2016-11-24 17:23:11 +01:00
"normalized_embeddings = embeddings / norm\n",
"valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
"similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
"\n",
"# Add variable initializer.\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"## Train the model"
]
},
{
"cell_type": "code",
"execution_count": 108,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"num_steps = 10001\n",
2016-11-24 17:23:11 +01:00
"\n",
"with tf.Session() as session:\n",
" init.run()\n",
"\n",
" average_loss = 0\n",
" for step in range(num_steps):\n",
" print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n",
" batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n",
" feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n",
"\n",
" # We perform one update step by evaluating the training op (including it\n",
" # in the list of returned values for session.run()\n",
" _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n",
" average_loss += loss_val\n",
"\n",
" if step % 2000 == 0:\n",
" if step > 0:\n",
" average_loss /= 2000\n",
" # The average loss is an estimate of the loss over the last 2000 batches.\n",
" print(\"Average loss at step \", step, \": \", average_loss)\n",
" average_loss = 0\n",
"\n",
" # Note that this is expensive (~20% slowdown if computed every 500 steps)\n",
" if step % 10000 == 0:\n",
" sim = similarity.eval()\n",
" for i in range(valid_size):\n",
" valid_word = vocabulary[valid_examples[i]]\n",
" top_k = 8 # number of nearest neighbors\n",
" nearest = (-sim[i, :]).argsort()[1:top_k+1]\n",
" log_str = \"Nearest to %s:\" % valid_word\n",
" for k in range(top_k):\n",
" close_word = vocabulary[nearest[k]]\n",
" log_str = \"%s %s,\" % (log_str, close_word)\n",
" print(log_str)\n",
"\n",
" final_embeddings = normalized_embeddings.eval()"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):"
]
},
{
"cell_type": "code",
"execution_count": 109,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"np.save(\"./my_final_embeddings.npy\", final_embeddings)"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"## Plot the embeddings"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 110,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
2016-11-24 17:23:11 +01:00
"source": [
"def plot_with_labels(low_dim_embs, labels):\n",
" assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n",
" plt.figure(figsize=(18, 18)) #in inches\n",
" for i, label in enumerate(labels):\n",
" x, y = low_dim_embs[i,:]\n",
" plt.scatter(x, y)\n",
" plt.annotate(label,\n",
" xy=(x, y),\n",
" xytext=(5, 2),\n",
" textcoords='offset points',\n",
" ha='right',\n",
" va='bottom')"
]
},
{
"cell_type": "code",
"execution_count": 111,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"from sklearn.manifold import TSNE\n",
"\n",
"tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n",
"plot_only = 500\n",
"low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n",
"labels = [vocabulary[i] for i in range(plot_only)]\n",
"plot_with_labels(low_dim_embs, labels)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"# Machine Translation"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters."
]
},
{
"cell_type": "code",
"execution_count": 112,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"reset_graph()\n",
2016-11-24 17:23:11 +01:00
"\n",
"n_steps = 50\n",
"n_neurons = 200\n",
"n_layers = 3\n",
"num_encoder_symbols = 20000\n",
"num_decoder_symbols = 20000\n",
"embedding_size = 150\n",
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n",
"Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n",
"W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n",
"Y_input = Y[:, :-1]\n",
"Y_target = Y[:, 1:]\n",
"\n",
2017-02-17 11:51:26 +01:00
"encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n",
"decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n",
2016-11-24 17:23:11 +01:00
"\n",
"lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
2016-11-24 17:23:11 +01:00
"\n",
2017-02-17 11:51:26 +01:00
"output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n",
2016-11-24 17:23:11 +01:00
" encoder_inputs,\n",
" decoder_inputs,\n",
" cell,\n",
" num_encoder_symbols,\n",
" num_decoder_symbols,\n",
" embedding_size)\n",
"\n",
2017-02-17 11:51:26 +01:00
"logits = tf.transpose(tf.unstack(output_seqs), perm=[1, 0, 2])"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 113,
2016-11-24 17:23:11 +01:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"outputs": [],
"source": [
"logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
"Y_target_flat = tf.reshape(Y_target, [-1])\n",
"W_flat = tf.reshape(W, [-1])\n",
2017-02-17 11:51:26 +01:00
"xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y_target_flat, logits=logits_flat)\n",
2016-11-24 17:23:11 +01:00
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-11-24 17:23:11 +01:00
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-11-24 17:23:11 +01:00
"source": [
"**Coming soon**"
]
2016-09-27 23:31:21 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
2016-09-27 23:31:21 +02:00
},
"nav_menu": {},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}