2742 lines
70 KiB
Plaintext
2742 lines
70 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"**Chapter 14 – Recurrent Neural Networks**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"_This notebook contains all the sample code and solutions to the exercises in chapter 14._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# To support both python 2 and python 3\n",
|
||
"from __future__ import division, print_function, unicode_literals\n",
|
||
"\n",
|
||
"# Common imports\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"\n",
|
||
"# to make this notebook's output stable across runs\n",
|
||
"def reset_graph(seed=42):\n",
|
||
" tf.reset_default_graph()\n",
|
||
" tf.set_random_seed(seed)\n",
|
||
" np.random.seed(seed)\n",
|
||
"\n",
|
||
"# To plot pretty figures\n",
|
||
"%matplotlib inline\n",
|
||
"import matplotlib\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"plt.rcParams['axes.labelsize'] = 14\n",
|
||
"plt.rcParams['xtick.labelsize'] = 12\n",
|
||
"plt.rcParams['ytick.labelsize'] = 12\n",
|
||
"\n",
|
||
"# Where to save the figures\n",
|
||
"PROJECT_ROOT_DIR = \".\"\n",
|
||
"CHAPTER_ID = \"rnn\"\n",
|
||
"\n",
|
||
"def save_fig(fig_id, tight_layout=True):\n",
|
||
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
|
||
" print(\"Saving figure\", fig_id)\n",
|
||
" if tight_layout:\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.savefig(path, format='png', dpi=300)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Then of course we will need TensorFlow:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Basic RNNs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Manual RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5\n",
|
||
"\n",
|
||
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"\n",
|
||
"Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n",
|
||
"Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n",
|
||
"b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
|
||
"\n",
|
||
"Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
|
||
"Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
|
||
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(Y0_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(Y1_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Using `static_rnn()`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"\n",
|
||
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
"output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],\n",
|
||
" dtype=tf.float32)\n",
|
||
"Y0, Y1 = output_seqs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
|
||
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"Y0_val"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"Y1_val"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from IPython.display import clear_output, Image, display, HTML\n",
|
||
"\n",
|
||
"def strip_consts(graph_def, max_const_size=32):\n",
|
||
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
|
||
" strip_def = tf.GraphDef()\n",
|
||
" for n0 in graph_def.node:\n",
|
||
" n = strip_def.node.add() \n",
|
||
" n.MergeFrom(n0)\n",
|
||
" if n.op == 'Const':\n",
|
||
" tensor = n.attr['value'].tensor\n",
|
||
" size = len(tensor.tensor_content)\n",
|
||
" if size > max_const_size:\n",
|
||
" tensor.tensor_content = \"b<stripped %d bytes>\"%size\n",
|
||
" return strip_def\n",
|
||
"\n",
|
||
"def show_graph(graph_def, max_const_size=32):\n",
|
||
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
|
||
" if hasattr(graph_def, 'as_graph_def'):\n",
|
||
" graph_def = graph_def.as_graph_def()\n",
|
||
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
|
||
" code = \"\"\"\n",
|
||
" <script>\n",
|
||
" function load() {{\n",
|
||
" document.getElementById(\"{id}\").pbtxt = {data};\n",
|
||
" }}\n",
|
||
" </script>\n",
|
||
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
|
||
" <div style=\"height:600px\">\n",
|
||
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
|
||
" </div>\n",
|
||
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
|
||
"\n",
|
||
" iframe = \"\"\"\n",
|
||
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
|
||
" \"\"\".format(code.replace('\"', '"'))\n",
|
||
" display(HTML(iframe))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"show_graph(tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Packing sequences"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 2\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n",
|
||
"\n",
|
||
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
"output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,\n",
|
||
" dtype=tf.float32)\n",
|
||
"outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.array([\n",
|
||
" # t = 0 t = 1 \n",
|
||
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
|
||
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
|
||
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
|
||
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
|
||
" ])\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(outputs_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Using `dynamic_rnn()`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 2\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"\n",
|
||
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.array([\n",
|
||
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
|
||
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
|
||
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
|
||
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
|
||
" ])\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(outputs_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"show_graph(tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Setting the sequence lengths"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 2\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5\n",
|
||
"\n",
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"seq_length = tf.placeholder(tf.int32, [None])\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n",
|
||
" sequence_length=seq_length)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.array([\n",
|
||
" # step 0 step 1\n",
|
||
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
|
||
" [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
|
||
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
|
||
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
|
||
" ])\n",
|
||
"seq_length_batch = np.array([2, 1, 2, 2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val, states_val = sess.run(\n",
|
||
" [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(outputs_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(states_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Training a sequence classifier"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
|
||
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
|
||
"* the default `activation` is now `None` rather than `tf.nn.relu`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 28\n",
|
||
"n_inputs = 28\n",
|
||
"n_neurons = 150\n",
|
||
"n_outputs = 10\n",
|
||
"\n",
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.int32, [None])\n",
|
||
"\n",
|
||
"basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
|
||
"\n",
|
||
"logits = tf.layers.dense(states, n_outputs)\n",
|
||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
|
||
" logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy)\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tensorflow.examples.tutorials.mnist import input_data\n",
|
||
"mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
|
||
"X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))\n",
|
||
"y_test = mnist.test.labels"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 100\n",
|
||
"batch_size = 150\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for iteration in range(mnist.train.num_examples // batch_size):\n",
|
||
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
||
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
|
||
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Multi-layer RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 28\n",
|
||
"n_inputs = 28\n",
|
||
"n_outputs = 10\n",
|
||
"\n",
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.int32, [None])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"\n",
|
||
"layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,\n",
|
||
" activation=tf.nn.relu)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"states_concat = tf.concat(axis=1, values=states)\n",
|
||
"logits = tf.layers.dense(states_concat, n_outputs)\n",
|
||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy)\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 10\n",
|
||
"batch_size = 150\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for iteration in range(mnist.train.num_examples // batch_size):\n",
|
||
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
||
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
|
||
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Time series"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"t_min, t_max = 0, 30\n",
|
||
"resolution = 0.1\n",
|
||
"\n",
|
||
"def time_series(t):\n",
|
||
" return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
|
||
"\n",
|
||
"def next_batch(batch_size, n_steps):\n",
|
||
" t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
|
||
" Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
|
||
" ys = time_series(Ts)\n",
|
||
" return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
|
||
"\n",
|
||
"plt.figure(figsize=(11,4))\n",
|
||
"plt.subplot(121)\n",
|
||
"plt.title(\"A time series (generated)\", fontsize=14)\n",
|
||
"plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
|
||
"plt.legend(loc=\"lower left\", fontsize=14)\n",
|
||
"plt.axis([0, 30, -17, 13])\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"plt.ylabel(\"Value\")\n",
|
||
"\n",
|
||
"plt.subplot(122)\n",
|
||
"plt.title(\"A training instance\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"\n",
|
||
"save_fig(\"time_series_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch, y_batch = next_batch(1, n_steps)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.c_[X_batch[0], y_batch[0]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Using an `OuputProjectionWrapper`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
|
||
" tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
|
||
" output_size=n_outputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_iterations = 1500\n",
|
||
"batch_size = 50\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" if iteration % 100 == 0:\n",
|
||
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" print(iteration, \"\\tMSE:\", mse)\n",
|
||
" \n",
|
||
" saver.save(sess, \"./my_time_series_model\") # not shown in the book"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess: # not shown in the book\n",
|
||
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
|
||
"\n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.title(\"Testing the model\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"save_fig(\"time_series_pred_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Without using an `OutputProjectionWrapper`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
|
||
"rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_outputs = 1\n",
|
||
"learning_rate = 0.001"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_iterations = 1500\n",
|
||
"batch_size = 50\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" if iteration % 100 == 0:\n",
|
||
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" print(iteration, \"\\tMSE:\", mse)\n",
|
||
" \n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
|
||
" \n",
|
||
" saver.save(sess, \"./my_time_series_model\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.title(\"Testing the model\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Generating a creative new sequence"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess: # not shown in the book\n",
|
||
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
|
||
"\n",
|
||
" sequence = [0.] * n_steps\n",
|
||
" for iteration in range(300):\n",
|
||
" X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
|
||
" sequence.append(y_pred[0, -1, 0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.figure(figsize=(8,4))\n",
|
||
"plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n",
|
||
"plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"plt.ylabel(\"Value\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" saver.restore(sess, \"./my_time_series_model\")\n",
|
||
"\n",
|
||
" sequence1 = [0. for i in range(n_steps)]\n",
|
||
" for iteration in range(len(t) - n_steps):\n",
|
||
" X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
|
||
" sequence1.append(y_pred[0, -1, 0])\n",
|
||
"\n",
|
||
" sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
|
||
" for iteration in range(len(t) - n_steps):\n",
|
||
" X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
|
||
" sequence2.append(y_pred[0, -1, 0])\n",
|
||
"\n",
|
||
"plt.figure(figsize=(11,4))\n",
|
||
"plt.subplot(121)\n",
|
||
"plt.plot(t, sequence1, \"b-\")\n",
|
||
"plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"plt.ylabel(\"Value\")\n",
|
||
"\n",
|
||
"plt.subplot(122)\n",
|
||
"plt.plot(t, sequence2, \"b-\")\n",
|
||
"plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"save_fig(\"creative_sequence_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Deep RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## MultiRNNCell"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 2\n",
|
||
"n_steps = 5\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"\n",
|
||
"layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.random.rand(2, n_steps, n_inputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"outputs_val.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Distributing a Deep RNN Across Multiple GPUs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Do **NOT** do this:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n",
|
||
" layer1 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
"\n",
|
||
"with tf.device(\"/gpu:1\"): # BAD! Ignored again.\n",
|
||
" layer2 = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Instead, you need a `DeviceCellWrapper`:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"\n",
|
||
"class DeviceCellWrapper(tf.contrib.rnn.RNNCell):\n",
|
||
" def __init__(self, device, cell):\n",
|
||
" self._cell = cell\n",
|
||
" self._device = device\n",
|
||
"\n",
|
||
" @property\n",
|
||
" def state_size(self):\n",
|
||
" return self._cell.state_size\n",
|
||
"\n",
|
||
" @property\n",
|
||
" def output_size(self):\n",
|
||
" return self._cell.output_size\n",
|
||
"\n",
|
||
" def __call__(self, inputs, state, scope=None):\n",
|
||
" with tf.device(self._device):\n",
|
||
" return self._cell(inputs, state, scope)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 5\n",
|
||
"n_steps = 20\n",
|
||
"n_neurons = 100\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
|
||
"cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))\n",
|
||
" for dev in devices]\n",
|
||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true,
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Dropout"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"n_steps = 20\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"keep_prob = 0.5\n",
|
||
"\n",
|
||
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
|
||
" for cell in cells]\n",
|
||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells_drop)\n",
|
||
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_iterations = 1000\n",
|
||
"batch_size = 50\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
|
||
" _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n",
|
||
" if iteration % 100 == 0:\n",
|
||
" print(iteration, \"Training MSE:\", mse)\n",
|
||
" \n",
|
||
" saver.save(sess, \"./my_dropout_time_series_model\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"n_steps = 20\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"keep_prob = 0.5\n",
|
||
"\n",
|
||
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
|
||
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
|
||
"\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" saver.restore(sess, \"./my_dropout_time_series_model\")\n",
|
||
"\n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
|
||
"\n",
|
||
"plt.title(\"Testing the model\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Oops, it seems that Dropout does not help at all in this particular case. :/"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"import sys\n",
|
||
"training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"if training:\n",
|
||
" cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
|
||
" for cell in cells]\n",
|
||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n",
|
||
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
|
||
"\n",
|
||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) # not shown in the book\n",
|
||
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n",
|
||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) # not shown\n",
|
||
"loss = tf.reduce_mean(tf.square(outputs - y)) # not shown\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # not shown\n",
|
||
"training_op = optimizer.minimize(loss) # not shown\n",
|
||
"init = tf.global_variables_initializer() # not shown\n",
|
||
"saver = tf.train.Saver() # not shown\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" if training:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps) # not shown\n",
|
||
" _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n",
|
||
" if iteration % 100 == 0: # not shown\n",
|
||
" print(iteration, \"Training MSE:\", mse) # not shown\n",
|
||
" save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
|
||
" else:\n",
|
||
" saver.restore(sess, \"/tmp/my_model.ckpt\")\n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new}) # not shown"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# LSTM"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 28\n",
|
||
"n_inputs = 28\n",
|
||
"n_neurons = 150\n",
|
||
"n_outputs = 10\n",
|
||
"n_layers = 3\n",
|
||
"\n",
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.int32, [None])\n",
|
||
"\n",
|
||
"lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
|
||
"top_layer_h_state = states[-1][1]\n",
|
||
"logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
|
||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||
" \n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"states"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"top_layer_h_state"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true,
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 10\n",
|
||
"batch_size = 150\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for iteration in range(mnist.train.num_examples // batch_size):\n",
|
||
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
||
" X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
|
||
" print(\"Epoch\", epoch, \"Train accuracy =\", acc_train, \"Test accuracy =\", acc_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Embeddings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Fetch the data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from six.moves import urllib\n",
|
||
"\n",
|
||
"import errno\n",
|
||
"import os\n",
|
||
"import zipfile\n",
|
||
"\n",
|
||
"WORDS_PATH = \"datasets/words\"\n",
|
||
"WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n",
|
||
"\n",
|
||
"def mkdir_p(path):\n",
|
||
" \"\"\"Create directories, ok if they already exist.\n",
|
||
" \n",
|
||
" This is for python 2 support. In python >=3.2, simply use:\n",
|
||
" >>> os.makedirs(path, exist_ok=True)\n",
|
||
" \"\"\"\n",
|
||
" try:\n",
|
||
" os.makedirs(path)\n",
|
||
" except OSError as exc:\n",
|
||
" if exc.errno == errno.EEXIST and os.path.isdir(path):\n",
|
||
" pass\n",
|
||
" else:\n",
|
||
" raise\n",
|
||
"\n",
|
||
"def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n",
|
||
" os.makedirs(words_path, exist_ok=True)\n",
|
||
" zip_path = os.path.join(words_path, \"words.zip\")\n",
|
||
" if not os.path.exists(zip_path):\n",
|
||
" urllib.request.urlretrieve(words_url, zip_path)\n",
|
||
" with zipfile.ZipFile(zip_path) as f:\n",
|
||
" data = f.read(f.namelist()[0])\n",
|
||
" return data.decode(\"ascii\").split()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"words = fetch_words_data()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"words[:5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Build the dictionary"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from collections import Counter\n",
|
||
"\n",
|
||
"vocabulary_size = 50000\n",
|
||
"\n",
|
||
"vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n",
|
||
"vocabulary = np.array([word for word, _ in vocabulary])\n",
|
||
"dictionary = {word: code for code, word in enumerate(vocabulary)}\n",
|
||
"data = np.array([dictionary.get(word, 0) for word in words])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"\" \".join(words[:9]), data[:9]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"words[24], data[24]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Generate batches"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import random\n",
|
||
"from collections import deque\n",
|
||
"\n",
|
||
"def generate_batch(batch_size, num_skips, skip_window):\n",
|
||
" global data_index\n",
|
||
" assert batch_size % num_skips == 0\n",
|
||
" assert num_skips <= 2 * skip_window\n",
|
||
" batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n",
|
||
" labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n",
|
||
" span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
|
||
" buffer = deque(maxlen=span)\n",
|
||
" for _ in range(span):\n",
|
||
" buffer.append(data[data_index])\n",
|
||
" data_index = (data_index + 1) % len(data)\n",
|
||
" for i in range(batch_size // num_skips):\n",
|
||
" target = skip_window # target label at the center of the buffer\n",
|
||
" targets_to_avoid = [ skip_window ]\n",
|
||
" for j in range(num_skips):\n",
|
||
" while target in targets_to_avoid:\n",
|
||
" target = random.randint(0, span - 1)\n",
|
||
" targets_to_avoid.append(target)\n",
|
||
" batch[i * num_skips + j] = buffer[skip_window]\n",
|
||
" labels[i * num_skips + j, 0] = buffer[target]\n",
|
||
" buffer.append(data[data_index])\n",
|
||
" data_index = (data_index + 1) % len(data)\n",
|
||
" return batch, labels"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_index=0\n",
|
||
"batch, labels = generate_batch(8, 2, 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch, [vocabulary[word] for word in batch]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"labels, [vocabulary[word] for word in labels[:, 0]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Build the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch_size = 128\n",
|
||
"embedding_size = 128 # Dimension of the embedding vector.\n",
|
||
"skip_window = 1 # How many words to consider left and right.\n",
|
||
"num_skips = 2 # How many times to reuse an input to generate a label.\n",
|
||
"\n",
|
||
"# We pick a random validation set to sample nearest neighbors. Here we limit the\n",
|
||
"# validation samples to the words that have a low numeric ID, which by\n",
|
||
"# construction are also the most frequent.\n",
|
||
"valid_size = 16 # Random set of words to evaluate similarity on.\n",
|
||
"valid_window = 100 # Only pick dev samples in the head of the distribution.\n",
|
||
"valid_examples = np.random.choice(valid_window, valid_size, replace=False)\n",
|
||
"num_sampled = 64 # Number of negative examples to sample.\n",
|
||
"\n",
|
||
"learning_rate = 0.01"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 104,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"# Input data.\n",
|
||
"train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
|
||
"valid_dataset = tf.constant(valid_examples, dtype=tf.int32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"vocabulary_size = 50000\n",
|
||
"embedding_size = 150\n",
|
||
"\n",
|
||
"# Look up embeddings for inputs.\n",
|
||
"init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
|
||
"embeddings = tf.Variable(init_embeds)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 106,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
|
||
"embed = tf.nn.embedding_lookup(embeddings, train_inputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 107,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Construct the variables for the NCE loss\n",
|
||
"nce_weights = tf.Variable(\n",
|
||
" tf.truncated_normal([vocabulary_size, embedding_size],\n",
|
||
" stddev=1.0 / np.sqrt(embedding_size)))\n",
|
||
"nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n",
|
||
"\n",
|
||
"# Compute the average NCE loss for the batch.\n",
|
||
"# tf.nce_loss automatically draws a new sample of the negative labels each\n",
|
||
"# time we evaluate the loss.\n",
|
||
"loss = tf.reduce_mean(\n",
|
||
" tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,\n",
|
||
" num_sampled, vocabulary_size))\n",
|
||
"\n",
|
||
"# Construct the Adam optimizer\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"# Compute the cosine similarity between minibatch examples and all embeddings.\n",
|
||
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))\n",
|
||
"normalized_embeddings = embeddings / norm\n",
|
||
"valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
|
||
"similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
|
||
"\n",
|
||
"# Add variable initializer.\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Train the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"num_steps = 10001\n",
|
||
"\n",
|
||
"with tf.Session() as session:\n",
|
||
" init.run()\n",
|
||
"\n",
|
||
" average_loss = 0\n",
|
||
" for step in range(num_steps):\n",
|
||
" print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n",
|
||
" batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n",
|
||
" feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n",
|
||
"\n",
|
||
" # We perform one update step by evaluating the training op (including it\n",
|
||
" # in the list of returned values for session.run()\n",
|
||
" _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n",
|
||
" average_loss += loss_val\n",
|
||
"\n",
|
||
" if step % 2000 == 0:\n",
|
||
" if step > 0:\n",
|
||
" average_loss /= 2000\n",
|
||
" # The average loss is an estimate of the loss over the last 2000 batches.\n",
|
||
" print(\"Average loss at step \", step, \": \", average_loss)\n",
|
||
" average_loss = 0\n",
|
||
"\n",
|
||
" # Note that this is expensive (~20% slowdown if computed every 500 steps)\n",
|
||
" if step % 10000 == 0:\n",
|
||
" sim = similarity.eval()\n",
|
||
" for i in range(valid_size):\n",
|
||
" valid_word = vocabulary[valid_examples[i]]\n",
|
||
" top_k = 8 # number of nearest neighbors\n",
|
||
" nearest = (-sim[i, :]).argsort()[1:top_k+1]\n",
|
||
" log_str = \"Nearest to %s:\" % valid_word\n",
|
||
" for k in range(top_k):\n",
|
||
" close_word = vocabulary[nearest[k]]\n",
|
||
" log_str = \"%s %s,\" % (log_str, close_word)\n",
|
||
" print(log_str)\n",
|
||
"\n",
|
||
" final_embeddings = normalized_embeddings.eval()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 109,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.save(\"./my_final_embeddings.npy\", final_embeddings)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"## Plot the embeddings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_with_labels(low_dim_embs, labels):\n",
|
||
" assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n",
|
||
" plt.figure(figsize=(18, 18)) #in inches\n",
|
||
" for i, label in enumerate(labels):\n",
|
||
" x, y = low_dim_embs[i,:]\n",
|
||
" plt.scatter(x, y)\n",
|
||
" plt.annotate(label,\n",
|
||
" xy=(x, y),\n",
|
||
" xytext=(5, 2),\n",
|
||
" textcoords='offset points',\n",
|
||
" ha='right',\n",
|
||
" va='bottom')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 111,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.manifold import TSNE\n",
|
||
"\n",
|
||
"tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n",
|
||
"plot_only = 500\n",
|
||
"low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n",
|
||
"labels = [vocabulary[i] for i in range(plot_only)]\n",
|
||
"plot_with_labels(low_dim_embs, labels)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Machine Translation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 112,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 50\n",
|
||
"n_neurons = 200\n",
|
||
"n_layers = 3\n",
|
||
"num_encoder_symbols = 20000\n",
|
||
"num_decoder_symbols = 20000\n",
|
||
"embedding_size = 150\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n",
|
||
"Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n",
|
||
"W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n",
|
||
"Y_input = Y[:, :-1]\n",
|
||
"Y_target = Y[:, 1:]\n",
|
||
"\n",
|
||
"encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n",
|
||
"decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n",
|
||
"\n",
|
||
"lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)\n",
|
||
"\n",
|
||
"output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n",
|
||
" encoder_inputs,\n",
|
||
" decoder_inputs,\n",
|
||
" cell,\n",
|
||
" num_encoder_symbols,\n",
|
||
" num_decoder_symbols,\n",
|
||
" embedding_size)\n",
|
||
"\n",
|
||
"logits = tf.transpose(tf.unstack(output_seqs), perm=[1, 0, 2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
|
||
"Y_target_flat = tf.reshape(Y_target, [-1])\n",
|
||
"W_flat = tf.reshape(W, [-1])\n",
|
||
"xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y_target_flat, logits=logits_flat)\n",
|
||
"loss = tf.reduce_mean(xentropy)\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"# Exercise solutions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"deletable": true,
|
||
"editable": true
|
||
},
|
||
"source": [
|
||
"**Coming soon**"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.5.3"
|
||
},
|
||
"nav_menu": {},
|
||
"toc": {
|
||
"navigate_menu": true,
|
||
"number_sections": true,
|
||
"sideBar": true,
|
||
"threshold": 6,
|
||
"toc_cell": false,
|
||
"toc_section_display": "block",
|
||
"toc_window_display": false
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
}
|