handson-ml/15_recurrent_neural_network...

4049 lines
111 KiB
Plaintext
Raw Normal View History

2016-09-27 23:31:21 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
2019-04-05 11:04:38 +02:00
"**Chapter 15 Recurrent Neural Networks**"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
2019-04-05 11:04:38 +02:00
"_This notebook contains all the sample code in chapter 15._"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
2019-04-05 11:04:38 +02:00
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
2019-04-05 11:04:38 +02:00
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Python ≥3.5 is required\n",
"import sys\n",
"assert sys.version_info >= (3, 5)\n",
"\n",
"# Scikit-Learn ≥0.20 is required\n",
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\"\n",
"\n",
"# TensorFlow ≥2.0-preview is required\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"assert tf.__version__ >= \"2.0\"\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"mpl.rc('axes', labelsize=14)\n",
"mpl.rc('xtick', labelsize=12)\n",
"mpl.rc('ytick', labelsize=12)\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"rnn\"\n",
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
"\n",
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format=fig_extension, dpi=resolution)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Basic RNNs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate the Dataset"
]
},
{
"cell_type": "code",
"execution_count": 493,
"metadata": {},
"outputs": [],
"source": [
"def generate_time_series(batch_size, n_steps):\n",
" freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)\n",
" time = np.linspace(0, 1, n_steps)\n",
" series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10)) # wave 1\n",
" series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2\n",
" series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5) # + noise\n",
" return series[..., np.newaxis].astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 494,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"\n",
"n_steps = 50\n",
"series = generate_time_series(10000, n_steps + 1)\n",
"X_train, y_train = series[:7000, :n_steps], series[:7000, -1]\n",
"X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]\n",
"X_test, y_test = series[9000:, :n_steps], series[9000:, -1]"
]
},
{
"cell_type": "code",
"execution_count": 495,
"metadata": {},
"outputs": [],
"source": [
"X_train.shape, y_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 496,
"metadata": {},
"outputs": [],
"source": [
"def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\"):\n",
" plt.plot(series, \".-\")\n",
" if y is not None:\n",
" plt.plot(n_steps, y, \"bx\", markersize=10)\n",
" if y_pred is not None:\n",
" plt.plot(n_steps, y_pred, \"ro\")\n",
" plt.grid(True)\n",
" if x_label:\n",
" plt.xlabel(x_label, fontsize=16)\n",
" if y_label:\n",
" plt.ylabel(y_label, fontsize=16, rotation=0)\n",
" plt.hlines(0, 0, 100, linewidth=1)\n",
" plt.axis([0, n_steps + 1, -1, 1])\n",
"\n",
"fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))\n",
"for col in range(3):\n",
" plt.sca(axes[col])\n",
" plot_series(X_valid[col, :, 0], y_valid[col, 0],\n",
" y_label=(\"$x(t)$\" if col==0 else None))\n",
"save_fig(\"time_series_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Computing Some Baselines"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Naive predictions (just predict the last observed value):"
]
},
{
"cell_type": "code",
"execution_count": 497,
"metadata": {},
"outputs": [],
"source": [
"y_pred = X_valid[:, -1]\n",
"np.mean(keras.losses.mean_squared_error(y_valid, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 498,
"metadata": {},
"outputs": [],
"source": [
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Linear predictions:"
]
},
{
"cell_type": "code",
"execution_count": 499,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.Flatten(),\n",
" keras.layers.Dense(1)\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
"history = model.fit(X_train, y_train, epochs=20,\n",
" validation_data=(X_valid, y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 500,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "code",
"execution_count": 501,
"metadata": {},
"outputs": [],
"source": [
"def plot_learning_curves(loss, val_loss):\n",
" plt.plot(np.arange(len(loss)) + 0.5, loss, \"b.-\", label=\"Training loss\")\n",
" plt.plot(np.arange(len(val_loss)) + 1, val_loss, \"r.-\", label=\"Validation loss\")\n",
" plt.gca().xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))\n",
" plt.axis([1, 20, 0, 0.05])\n",
" plt.legend(fontsize=14)\n",
" plt.xlabel(\"Epochs\")\n",
" plt.ylabel(\"Loss\")\n",
" plt.grid(True)\n",
"\n",
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 502,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_valid)\n",
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Using a Simple RNN"
]
},
{
"cell_type": "code",
"execution_count": 503,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([keras.layers.SimpleRNN(1)])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
"history = model.fit(X_train, y_train, epochs=20,\n",
" validation_data=(X_valid, y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 504,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "code",
"execution_count": 505,
"metadata": {},
"outputs": [],
"source": [
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 506,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_valid)\n",
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deep RNNs"
]
},
{
"cell_type": "code",
"execution_count": 513,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.SimpleRNN(1)\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
"history = model.fit(X_train, y_train, epochs=20,\n",
" validation_data=(X_valid, y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 514,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "code",
"execution_count": 515,
"metadata": {},
"outputs": [],
"source": [
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 516,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_valid)\n",
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 517,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.SimpleRNN(20),\n",
" keras.layers.Dense(1)\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
"history = model.fit(X_train, y_train, epochs=20,\n",
" validation_data=(X_valid, y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 518,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "code",
"execution_count": 519,
"metadata": {},
"outputs": [],
"source": [
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 520,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_valid)\n",
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Forecasting Several Steps Ahead"
]
},
{
"cell_type": "code",
"execution_count": 390,
"metadata": {},
"outputs": [],
"source": [
"X_new.shape"
]
},
{
"cell_type": "code",
"execution_count": 394,
"metadata": {},
"outputs": [],
"source": [
"Y_pred.shape"
]
},
{
"cell_type": "code",
"execution_count": 401,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(43) # not 42, as it would give the first series in the train set\n",
"\n",
"series = generate_time_series(1, n_steps + 10)\n",
"X_new, Y_new = series[:, :n_steps], series[:, n_steps:]\n",
"X = X_new\n",
"for step_ahead in range(10):\n",
" y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]\n",
" X = np.concatenate([X, y_pred_one], axis=1)\n",
"\n",
"Y_pred = X[:, n_steps:]"
]
},
{
"cell_type": "code",
"execution_count": 402,
"metadata": {},
"outputs": [],
"source": [
"Y_pred.shape"
]
},
{
"cell_type": "code",
"execution_count": 403,
"metadata": {},
"outputs": [],
"source": [
"plot_series(X_new[0, :50, 0])\n",
"plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
"plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
"plt.axis([0, 60, -1, 1])\n",
"save_fig(\"forecast_ahead_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's create an RNN that predicts all 10 next values at once:"
]
},
{
"cell_type": "code",
"execution_count": 521,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"\n",
"n_steps = 50\n",
"series = generate_time_series(10000, n_steps + 10)\n",
"X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n",
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n",
"X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]"
]
},
{
"cell_type": "code",
"execution_count": 522,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1)),\n",
" keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 523,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(43)\n",
"\n",
"series = generate_time_series(1, 50 + 10)\n",
"X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n",
"Y_pred = model.predict(X_new)"
]
},
{
"cell_type": "code",
"execution_count": 524,
"metadata": {},
"outputs": [],
"source": [
"plot_series(X_new[0, :50, 0])\n",
"plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
"plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
"plt.axis([0, 60, -1, 1])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future:"
]
},
{
"cell_type": "code",
"execution_count": 531,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"\n",
"n_steps = 50\n",
"series = generate_time_series(10000, n_steps + 10)\n",
"X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n",
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n",
"X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]"
]
},
{
"cell_type": "code",
"execution_count": 532,
"metadata": {},
"outputs": [],
"source": [
"X_train.shape, Y_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 527,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"def last_10_time_steps_mse(Y_true, Y_pred):\n",
" return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 409,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(43)\n",
"\n",
"series = generate_time_series(1, 50 + 10)\n",
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
"Y_pred = model.predict(X_new)[:, -10:, :]"
]
},
{
"cell_type": "code",
"execution_count": 410,
"metadata": {},
"outputs": [],
"source": [
"plot_series(X_new[0, :50, 0])\n",
"plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
"plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
"plt.axis([0, 60, -1, 1])\n",
"save_fig(\"forecast_ahead_multi_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deep RNN with Batch Norm"
]
},
{
"cell_type": "code",
"execution_count": 534,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.BatchNormalization(),\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.BatchNormalization(),\n",
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
" keras.layers.BatchNormalization(),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 365,
"metadata": {},
"outputs": [],
"source": [
"model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deep RNNs with Layer Norm"
]
},
{
"cell_type": "code",
"execution_count": 619,
"metadata": {},
"outputs": [],
"source": [
"keras.layers.GRUCell.get_initial_state?"
]
},
{
"cell_type": "code",
"execution_count": 622,
"metadata": {},
"outputs": [],
"source": [
"LayerNormalization = keras.layers.experimental.LayerNormalization\n",
"\n",
"class LNSimpleRNNCell(keras.layers.Layer):\n",
" def __init__(self, units, activation=\"tanh\", **kwargs):\n",
" super().__init__(**kwargs)\n",
" self.state_size = units\n",
" self.output_size = units\n",
" self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,\n",
" activation=None)\n",
" self.layer_norm = LayerNormalization()\n",
" self.activation = keras.activations.get(activation)\n",
" def get_initial_state(self, inputs=None, batch_size=None, dtype=None):\n",
" return tf.zeros([batch_size, self.state_size], dtype=dtype)\n",
" def call(self, inputs, states):\n",
" outputs, new_states = self.simple_rnn_cell(inputs, states)\n",
" norm_outputs = self.activation(self.layer_norm(outputs))\n",
" return norm_outputs, [norm_outputs]"
]
},
{
"cell_type": "code",
"execution_count": 623,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LSTMs"
]
},
{
"cell_type": "code",
"execution_count": 626,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.LSTM(20, return_sequences=True),\n",
" keras.layers.LSTM(20, return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 368,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "code",
"execution_count": 369,
"metadata": {},
"outputs": [],
"source": [
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 370,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"y_pred = model.predict(X_valid)\n",
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# GRUs"
]
},
{
"cell_type": "code",
"execution_count": 648,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.GRU(20, return_sequences=True),\n",
" keras.layers.GRU(20, return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 372,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_valid, y_valid)"
]
},
{
"cell_type": "code",
"execution_count": 373,
"metadata": {},
"outputs": [],
"source": [
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 374,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"y_pred = model.predict(X_valid)\n",
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using One-Dimensional Convolutional Layers to Process Sequences"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"1D conv layer with kernel size 4, stride 2, VALID padding:\n",
"\n",
" |-----2----| |-----5---... |----23-----|\n",
" |-----1----| |-----4-----| ... |-----22----|\n",
" |-----0----| |-----3----| |---...-21---|\n",
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
"\n",
"Output:\n",
"\n",
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 638,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"def last_5_time_steps_mse(Y_true, Y_pred):\n",
" return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n",
"\n",
"model = keras.models.Sequential([\n",
" keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\"),\n",
" keras.layers.GRU(20, return_sequences=True),\n",
" keras.layers.GRU(20, return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n",
"history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n",
" validation_data=(X_valid, Y_valid[:, 3::2]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## WaveNet"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n",
" / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n",
" / \\ / \\ / \\ / \\\n",
"C1 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n",
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
"\n",
"Output:\n",
"\n",
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 671,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential()\n",
"for rate in (1, 2, 4, 8) * 2:\n",
" activation = \"relu\" if len(model.layers) < 7 else None\n",
" model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n",
" activation=activation, dilation_rate=rate))\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train[:, 30:], epochs=20,\n",
" validation_data=(X_valid, Y_valid[:, 30:]))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is the original WaveNet defined in the paper: it uses Gated Activation Units instead of ReLU and parametrized skip connections, plus it pads with zeros on the left to avoid getting shorter and shorter sequences:"
]
},
{
"cell_type": "code",
"execution_count": 734,
"metadata": {},
"outputs": [],
"source": [
"from tensorflow import keras\n",
"\n",
"class GatedActivationUnit(keras.layers.Layer):\n",
" def __init__(self, activation=\"tanh\", **kwargs):\n",
" super().__init__(**kwargs)\n",
" self.activation = keras.activations.get(activation)\n",
" def call(self, inputs):\n",
" n_filters = inputs.shape[-1] // 2\n",
" linear_output = self.activation(inputs[..., :n_filters])\n",
" gate = keras.activations.sigmoid(inputs[..., n_filters:])\n",
" return self.activation(linear_output) * gate"
]
},
{
"cell_type": "code",
"execution_count": 735,
"metadata": {},
"outputs": [],
"source": [
"def wavenet_residual_block(inputs, n_filters, dilation_rate):\n",
" z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n",
" z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n",
" dilation_rate=dilation_rate)(z)\n",
" z = GatedActivationUnit()(z)\n",
" z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n",
" return keras.layers.Add()([z, inputs]), z"
]
},
{
"cell_type": "code",
"execution_count": 736,
"metadata": {},
"outputs": [],
"source": [
"inputs = keras.layers.Input(shape=[10000, 1])\n",
"skip_to_last = []\n",
"n_filters = 128\n",
"z = keras.backend.temporal_padding(inputs, (1, 0))\n",
"z = keras.layers.Conv1D(n_filters, kernel_size=2, kernel_size=1)(z)\n",
"for dilation_rate in [2**i for i in range(10)] * 3:\n",
" z, skip = wavenet_residual_block(z, 128, dilation_rate)\n",
" skip_to_last.append(skip)\n",
"z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
"z = keras.layers.Conv1D(128, kernel_size=1, activation=\"relu\")(z)\n",
"Y_proba = keras.layers.Conv1D(256, kernel_size=1, activation=\"softmax\")(z)\n",
"\n",
"model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
]
},
{
"cell_type": "code",
"execution_count": 732,
"metadata": {},
"outputs": [],
"source": [
"seq_length = 10000\n",
"n_layers_per_block = 10\n",
"n_blocks = 3\n",
"n_filters = 128\n",
"n_outputs = 256\n",
"\n",
"inputs = keras.layers.Input(shape=[seq_length, 1])\n",
"skip_to_last = []\n",
"z = inputs\n",
"for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n",
" z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n",
" skip_to_last.append(skip)\n",
"z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
"z = keras.layers.Conv1D(n_filters, kernel_size=1, activation=\"relu\")(z)\n",
"Y_proba = keras.layers.Conv1D(n_outputs, kernel_size=1, activation=\"softmax\")(z)\n",
"\n",
"model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Low-Level RNN API"
]
},
{
"cell_type": "code",
"execution_count": 611,
"metadata": {},
"outputs": [],
"source": [
"class MyRNN(keras.layers.Layer):\n",
" def __init__(self, cell, return_sequences=False, **kwargs):\n",
" super().__init__(**kwargs)\n",
" self.cell = cell\n",
" self.return_sequences = return_sequences\n",
" try:\n",
" self.initial_state = self.cell.get_initial_state()\n",
" except AttributeError:\n",
" self.initial_state = [tf.zeros(shape=[size], dtype=inputs.dtype)\n",
" for size in self.cell.states_size]\n",
" def call(self, inputs):\n",
" n_steps = tf.shape(inputs)[1]\n",
" if self.return_sequences:\n",
" sequences = tf.TensorArray(inputs.dtype, size=n_steps)\n",
" for step in tf.range(n_steps):\n",
" outputs, states = self.cell(inputs[:, step], states)\n",
" if self.return_sequences:\n",
" sequences.write(step, outputs)\n",
" if self.return_sequences:\n",
" return sequences.stack(), states\n",
" else:\n",
" return outputs, states"
]
},
{
"cell_type": "code",
"execution_count": 612,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"model = keras.models.Sequential([\n",
" MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
" MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
"])\n",
"\n",
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
"history = model.fit(X_train, Y_train, epochs=20,\n",
" validation_data=(X_valid, Y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"batch_size = 32\n",
"\n",
"X_batch = X_train[:batch_size]\n",
"y_batch = y_train[:batch_size]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"n_neurons = 10\n",
"cell = keras.layers.SimpleRNNCell(n_neurons)\n",
"\n",
"states = [tf.zeros((batch_size, n_neurons))]\n",
"for step in range(n_steps):\n",
" output, states = cell(X_batch[:, step], states)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Splitting a sequence into batches of shuffled windows"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For example, let's split the sequence 0 to 14 into windows of length 5, each shifted by 2 (e.g.,`[0, 1, 2, 3, 4]`, `[2, 3, 4, 5, 6]`, etc.), then shuffle them, and split them into inputs (the first 4 steps) and targets (the last 4 steps) (e.g., `[2, 3, 4, 5, 6]` would be split into `[[2, 3, 4, 5], [3, 4, 5, 6]]`), then create batches of 3 such input/target pairs:"
]
},
{
"cell_type": "code",
"execution_count": 467,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"n_steps = 5\n",
"dataset = tf.data.Dataset.from_tensor_slices(tf.range(15))\n",
"dataset = dataset.window(n_steps, shift=2, drop_remainder=True)\n",
"dataset = dataset.flat_map(lambda window: window.batch(n_steps))\n",
"dataset = dataset.shuffle(10).map(lambda window: (window[:-1], window[1:]))\n",
"dataset = dataset.batch(3).prefetch(1)\n",
"for index, (X_batch, y_batch) in enumerate(dataset):\n",
" print(\"_\" * 20, \"Batch\", index, \"\\nX_batch\")\n",
" print(X_batch.numpy())\n",
" print(\"=\" * 5, \"\\nY_batch\")\n",
" print(y_batch.numpy())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Char-RNN"
]
},
{
"cell_type": "code",
"execution_count": 452,
"metadata": {},
"outputs": [],
"source": [
"shakespeare_url = \"https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\"\n",
"filepath = keras.utils.get_file(\"shakespeare.txt\", shakespeare_url)\n",
"with open(filepath) as f:\n",
" shakespeare_text = f.read()"
]
},
{
"cell_type": "code",
"execution_count": 453,
"metadata": {},
"outputs": [],
"source": [
"print(shakespeare_text[:148])"
]
},
{
"cell_type": "code",
"execution_count": 454,
"metadata": {},
"outputs": [],
"source": [
"\"\".join(sorted(set(shakespeare_text.lower())))"
]
},
{
"cell_type": "code",
"execution_count": 468,
"metadata": {},
"outputs": [],
"source": [
"tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)\n",
"tokenizer.fit_on_texts(shakespeare_text)"
]
},
{
"cell_type": "code",
"execution_count": 469,
"metadata": {},
"outputs": [],
"source": [
"tokenizer.texts_to_sequences([\"First\"])"
]
},
{
"cell_type": "code",
"execution_count": 470,
"metadata": {},
"outputs": [],
"source": [
"tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])"
]
},
{
"cell_type": "code",
"execution_count": 471,
"metadata": {},
"outputs": [],
"source": [
"max_id = len(tokenizer.word_index) # number of distinct characters\n",
"dataset_size = tokenizer.document_count # total number of characters"
]
},
{
"cell_type": "code",
"execution_count": 472,
"metadata": {},
"outputs": [],
"source": [
"[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text]))\n",
"train_size = dataset_size * 90 // 100\n",
"dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])"
]
},
{
"cell_type": "code",
"execution_count": 473,
"metadata": {},
"outputs": [],
"source": [
"n_steps = 100 + 1 # 100 input characters, 1 target\n",
"dataset = dataset.repeat().window(n_steps, shift=1, drop_remainder=True)"
]
},
{
"cell_type": "code",
"execution_count": 474,
"metadata": {},
"outputs": [],
"source": [
"dataset = dataset.flat_map(lambda window: window.batch(n_steps))"
]
},
{
"cell_type": "code",
"execution_count": 475,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
2019-04-05 11:04:38 +02:00
"dataset = dataset.shuffle(10000).map(lambda window: (window[:-1], window[1:]))"
]
},
{
"cell_type": "code",
"execution_count": 476,
"metadata": {},
"outputs": [],
"source": [
"dataset = dataset.map(\n",
" lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))"
]
},
{
"cell_type": "code",
"execution_count": 477,
"metadata": {},
"outputs": [],
"source": [
"batch_size = 32\n",
"dataset = dataset.batch(batch_size).prefetch(1)"
]
},
{
"cell_type": "code",
"execution_count": 478,
"metadata": {},
"outputs": [],
"source": [
"for X_batch, Y_batch in dataset.take(1):\n",
" print(X_batch.shape, Y_batch.shape)"
]
},
{
"cell_type": "code",
"execution_count": 482,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.GRU(128, return_sequences=True),\n",
" keras.layers.GRU(128, return_sequences=True),\n",
" keras.layers.GRU(max_id, return_sequences=True, activation=\"softmax\"),\n",
"])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n",
"history = model.fit(dataset, steps_per_epoch=train_size // batch_size,\n",
" epochs=20)"
]
},
{
"cell_type": "code",
"execution_count": 490,
"metadata": {},
"outputs": [],
"source": [
"m = keras.models.Sequential([\n",
" keras.layers.LSTM(128, return_sequences=True),\n",
" keras.layers.LSTM(3, return_sequences=True, activation=\"softmax\"),\n",
" #keras.layers.TimeDistributed(keras.layers.Dense(3, activation=\"softmax\")),\n",
"])\n",
"m.predict(np.random.rand(1, 10, 20)).sum(axis=-1)"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {},
"outputs": [],
"source": [
"def preprocess(texts):\n",
" X = np.array(tokenizer.texts_to_sequences(texts))\n",
" return tf.one_hot(X, max_id)"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [],
"source": [
"X_new = preprocess([\"How are yo\"])\n",
"y_pred = model.predict_classes(X_new)\n",
"tokenizer.sequences_to_texts([y_pred])"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"model.layers[-1].weights[1].shape"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {},
"outputs": [],
"source": [
"def next_char(texts, temperature=1):\n",
" X_new = preprocess(texts)\n",
" y_proba = model.predict(X_new)\n",
" logits = tf.math.log(y_proba) / temperature\n",
" char_id = tf.random.categorical(logits, 1)\n",
" return tokenizer.sequences_to_texts(char_id.numpy())"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {},
"outputs": [],
"source": [
"def complete_text(text, n_chars=50, temperature=1):\n",
" for _ in range(n_chars):\n",
" text += next_char([text], temperature)[0]\n",
" return text"
]
},
{
"cell_type": "code",
"execution_count": 238,
"metadata": {},
"outputs": [],
"source": [
"print(complete_text(\"W\", temperature=0.001))"
]
},
{
"cell_type": "code",
"execution_count": 365,
"metadata": {},
"outputs": [],
"source": [
"print(complete_text(\"W\", temperature=0.5))"
]
},
{
"cell_type": "code",
"execution_count": 240,
"metadata": {},
"outputs": [],
"source": [
"print(complete_text(\"W\", temperature=1000))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Handling Sequences of Different Sizes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's shorten each time series by chopping off a random number of time steps (from the start, so we don't need to change the targets):"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"def shorten_series(X):\n",
" row_lengths = np.random.randint(10, n_steps + 1, size=len(X))\n",
" X_values = np.concatenate([row[-length:] for row, length in zip(X, row_lengths)])\n",
" return tf.RaggedTensor.from_row_lengths(X_values, row_lengths)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
2016-09-27 23:31:21 +02:00
"\n",
2019-04-05 11:04:38 +02:00
"X_train_ragged = shorten_series(X_train)\n",
"X_valid_ragged = shorten_series(X_valid)\n",
"X_test_ragged = shorten_series(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"X_train_ragged.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The lengths of the first 10 series:"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"[len(series) for series in X_train_ragged[:10]]"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [],
"source": [
"mask_value = 1000.\n",
"X_train_padded = X_train_ragged.to_tensor(default_value=mask_value)\n",
"X_valid_padded = X_valid_ragged.to_tensor(default_value=mask_value)\n",
"X_test_padded = X_test_ragged.to_tensor(default_value=mask_value)"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [],
"source": [
"masking_layer = keras.layers.Masking(mask_value)\n",
"series = np.array([[[1.], [2.], [mask_value], [mask_value]],\n",
" [[3.], [4.], [5.], [mask_value]]])\n",
"masking_layer(series)"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [],
"source": [
"masking_layer.compute_mask(series)"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
2016-09-27 23:31:21 +02:00
"\n",
2019-04-05 11:04:38 +02:00
"model = keras.models.Sequential([\n",
" keras.layers.Masking(mask_value, input_shape=[50, 1]),\n",
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
" keras.layers.SimpleRNN(1, return_sequences=True),\n",
"])\n",
"model(X_train_padded[:1])"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
2016-09-27 23:31:21 +02:00
"\n",
2019-04-05 11:04:38 +02:00
"model = keras.models.Sequential([\n",
" keras.layers.Masking(mask_value, input_shape=[50, 1]),\n",
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
" keras.layers.SimpleRNN(100),\n",
"])\n",
"model(X_train_padded[:1])"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
2016-09-27 23:31:21 +02:00
"\n",
2019-04-05 11:04:38 +02:00
"model = keras.models.Sequential([\n",
" keras.layers.Masking(input_shape=[50, 1]),\n",
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
" keras.layers.SimpleRNN(1, activation=None)\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"optimizer = keras.optimizers.SGD(lr=1e-4, momentum=0.95, nesterov=True)\n",
"model.compile(loss=\"mse\", optimizer=optimizer)\n",
"history = model.fit(X_train_padded, tf.constant(y_train), epochs=20,\n",
" validation_data=(X_valid_padded, tf.constant(y_valid)))"
2016-09-27 23:31:21 +02:00
]
},
2019-04-05 11:04:38 +02:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
2019-04-05 11:04:38 +02:00
"# Sketch RNN"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
2019-04-05 11:04:38 +02:00
"execution_count": 1,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
2019-04-05 11:04:38 +02:00
"import tensorflow_datasets as tfds"
2016-09-27 23:31:21 +02:00
]
},
{
2019-04-05 11:04:38 +02:00
"cell_type": "code",
"execution_count": 9,
"metadata": {},
2019-04-05 11:04:38 +02:00
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
2019-04-05 11:04:38 +02:00
"datasets = tfds.load(\"quickdraw_sketch_rnn\", as_supervised=True)"
2016-09-27 23:31:21 +02:00
]
},
{
2019-04-05 11:04:38 +02:00
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"train_set = datasets[\"train\"]\n",
"valid_set = datasets[\"validation\"]\n",
"test_set = datasets[\"test\"]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
2019-04-05 11:04:38 +02:00
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
2019-04-05 11:04:38 +02:00
"for X_batch, y_batch in train_set.take(2):\n",
" print(X_batch.shape)"
2016-09-27 23:31:21 +02:00
]
},
2019-04-05 11:04:38 +02:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"\n",
"Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n",
"Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n",
2016-09-27 23:31:21 +02:00
"b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
"\n",
"Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
"Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
2016-09-27 23:31:21 +02:00
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"print(Y0_val)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"print(Y1_val)"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Using `static_rnn()`"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note**: `tf.contrib.rnn` was partially moved to the core API in TensorFlow 1.2. Most of the `*Cell` and `*Wrapper` classes are now available in `tf.nn.rnn_cell`, and the `tf.contrib.rnn.static_rnn()` function is available as `tf.nn.static_rnn()`."
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_inputs = 3\n",
"n_neurons = 5"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"output_seqs, states = tf.nn.static_rnn(basic_cell, [X0, X1],\n",
" dtype=tf.float32)\n",
"Y0, Y1 = output_seqs"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"Y0_val"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"Y1_val"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"from tensorflow_graph_in_jupyter import show_graph"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Packing sequences"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
2017-02-17 11:51:26 +01:00
"X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n",
2016-09-27 23:31:21 +02:00
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"output_seqs, states = tf.nn.static_rnn(basic_cell, X_seqs,\n",
" dtype=tf.float32)\n",
"outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"X_batch = np.array([\n",
" # t = 0 t = 1 \n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Using `dynamic_rnn()`"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"X_batch = np.array([\n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Setting the sequence lengths"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_steps = 2\n",
"n_inputs = 3\n",
"n_neurons = 5\n",
"\n",
"reset_graph()\n",
"\n",
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"seq_length = tf.placeholder(tf.int32, [None])\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n",
" sequence_length=seq_length)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"X_batch = np.array([\n",
" # step 0 step 1\n",
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
" [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
" ])\n",
"seq_length_batch = np.array([2, 1, 2, 2])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val, states_val = sess.run(\n",
" [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"print(outputs_val)"
]
},
{
"cell_type": "code",
"execution_count": 33,
2019-04-05 11:04:38 +02:00
"metadata": {
"scrolled": true
},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"print(states_val)"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Training a sequence classifier"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
"* the default `activation` is now `None` rather than `tf.nn.relu`."
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons = 150\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
2016-09-27 23:31:21 +02:00
"\n",
"logits = tf.layers.dense(states, n_outputs)\n",
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
" logits=logits)\n",
2016-09-27 23:31:21 +02:00
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Warning**: `tf.examples.tutorials.mnist` is deprecated. We will use `tf.keras.datasets.mnist` instead."
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
"X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0\n",
"X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0\n",
"y_train = y_train.astype(np.int32)\n",
"y_test = y_test.astype(np.int32)\n",
"X_valid, X_train = X_train[:5000], X_train[5000:]\n",
"y_valid, y_train = y_train[:5000], y_train[5000:]"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"def shuffle_batch(X, y, batch_size):\n",
" rnd_idx = np.random.permutation(len(X))\n",
" n_batches = len(X) // batch_size\n",
" for batch_idx in np.array_split(rnd_idx, n_batches):\n",
" X_batch, y_batch = X[batch_idx], y[batch_idx]\n",
" yield X_batch, y_batch"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"X_test = X_test.reshape((-1, n_steps, n_inputs))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
"n_epochs = 100\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
2016-09-27 23:31:21 +02:00
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Last batch accuracy:\", acc_batch, \"Test accuracy:\", acc_test)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"# Multi-layer RNN"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 28\n",
"n_inputs = 28\n",
"n_outputs = 10\n",
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"n_neurons = 100\n",
"n_layers = 3\n",
2016-09-27 23:31:21 +02:00
"\n",
"layers = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons,\n",
" activation=tf.nn.relu)\n",
" for layer in range(n_layers)]\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(layers)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"states_concat = tf.concat(axis=1, values=states)\n",
"logits = tf.layers.dense(states_concat, n_outputs)\n",
2017-02-17 11:51:26 +01:00
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
2016-09-27 23:31:21 +02:00
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_epochs = 10\n",
2016-09-27 23:31:21 +02:00
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
2016-09-27 23:31:21 +02:00
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Last batch accuracy:\", acc_batch, \"Test accuracy:\", acc_test)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"# Time series"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"t_min, t_max = 0, 30\n",
"resolution = 0.1\n",
"\n",
"def time_series(t):\n",
" return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
"\n",
"def next_batch(batch_size, n_steps):\n",
" t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
" Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
" ys = time_series(Ts)\n",
" return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 20\n",
"t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.subplot(121)\n",
"plt.title(\"A time series (generated)\", fontsize=14)\n",
"plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
"plt.legend(loc=\"lower left\", fontsize=14)\n",
"plt.axis([0, 30, -17, 13])\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"\n",
"plt.subplot(122)\n",
"plt.title(\"A training instance\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"\n",
"save_fig(\"time_series_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"X_batch, y_batch = next_batch(1, n_steps)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"np.c_[X_batch[0], y_batch[0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Using an `OuputProjectionWrapper`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:"
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
"\n",
"cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`."
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"n_outputs = 1\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
" tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
" output_size=n_outputs)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"learning_rate = 0.001\n",
"\n",
"loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"n_iterations = 1500\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" \n",
" saver.save(sess, \"./my_time_series_model\") # not shown in the book"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess: # not shown in the book\n",
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
"\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"save_fig(\"time_series_pred_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Without using an `OutputProjectionWrapper`"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_steps = 20\n",
"n_inputs = 1\n",
"n_neurons = 100\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"n_outputs = 1\n",
"learning_rate = 0.001"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
2016-09-27 23:31:21 +02:00
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_iterations = 1500\n",
2016-09-27 23:31:21 +02:00
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" if iteration % 100 == 0:\n",
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
" print(iteration, \"\\tMSE:\", mse)\n",
" \n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
" \n",
" saver.save(sess, \"./my_time_series_model\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Generating a creative new sequence"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess: # not shown in the book\n",
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
"\n",
" sequence = [0.] * n_steps\n",
" for iteration in range(300):\n",
" X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence.append(y_pred[0, -1, 0])"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8,4))\n",
"plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n",
"plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, \"./my_time_series_model\")\n",
2016-09-27 23:31:21 +02:00
"\n",
" sequence1 = [0. for i in range(n_steps)]\n",
" for iteration in range(len(t) - n_steps):\n",
" X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence1.append(y_pred[0, -1, 0])\n",
"\n",
" sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
" for iteration in range(len(t) - n_steps):\n",
" X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
" sequence2.append(y_pred[0, -1, 0])\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.subplot(121)\n",
"plt.plot(t, sequence1, \"b-\")\n",
"plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"plt.ylabel(\"Value\")\n",
"\n",
"plt.subplot(122)\n",
"plt.plot(t, sequence2, \"b-\")\n",
"plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
"plt.xlabel(\"Time\")\n",
"save_fig(\"creative_sequence_plot\")\n",
2016-09-27 23:31:21 +02:00
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deep RNN"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MultiRNNCell"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_inputs = 2\n",
"n_steps = 5\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"n_neurons = 100\n",
"n_layers = 3\n",
"\n",
"layers = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(layers)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"X_batch = np.random.rand(2, n_steps, n_inputs)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"outputs_val.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Distributing a Deep RNN Across Multiple GPUs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do **NOT** do this:"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n",
" layer1 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
"\n",
"with tf.device(\"/gpu:1\"): # BAD! Ignored again.\n",
" layer2 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Instead, you need a `DeviceCellWrapper`:"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):\n",
" def __init__(self, device, cell):\n",
" self._cell = cell\n",
" self._device = device\n",
"\n",
" @property\n",
" def state_size(self):\n",
" return self._cell.state_size\n",
"\n",
" @property\n",
" def output_size(self):\n",
" return self._cell.output_size\n",
"\n",
" def __call__(self, inputs, state, scope=None):\n",
" with tf.device(self._device):\n",
" return self._cell(inputs, state, scope)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"n_inputs = 5\n",
"n_steps = 20\n",
"n_neurons = 100\n",
"\n",
"X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
"cells = [DeviceCellWrapper(dev,tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons))\n",
" for dev in devices]\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(cells)\n",
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, since TensorFlow 1.1, you can use the `tf.contrib.rnn.DeviceWrapper` class (alias `tf.nn.rnn_cell.DeviceWrapper` since TF 1.2)."
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"scrolled": true
2017-02-17 11:51:26 +01:00
},
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"## Dropout"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"n_inputs = 1\n",
2016-09-27 23:31:21 +02:00
"n_neurons = 100\n",
"n_layers = 3\n",
"n_steps = 20\n",
"n_outputs = 1"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention."
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"keep_prob = tf.placeholder_with_default(1.0, shape=())\n",
"cells = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"cells_drop = [tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
" for cell in cells]\n",
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(cells_drop)\n",
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"learning_rate = 0.01\n",
"\n",
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
"\n",
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_iterations = 1500\n",
"batch_size = 50\n",
"train_keep_prob = 0.5\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for iteration in range(n_iterations):\n",
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
" _, mse = sess.run([training_op, loss],\n",
" feed_dict={X: X_batch, y: y_batch,\n",
" keep_prob: train_keep_prob})\n",
" if iteration % 100 == 0: # not shown in the book\n",
" print(iteration, \"Training MSE:\", mse) # not shown\n",
" \n",
" saver.save(sess, \"./my_dropout_time_series_model\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, \"./my_dropout_time_series_model\")\n",
2016-09-27 23:31:21 +02:00
"\n",
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"plt.title(\"Testing the model\", fontsize=14)\n",
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
"plt.legend(loc=\"upper left\")\n",
"plt.xlabel(\"Time\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Oops, it seems that Dropout does not help at all in this particular case. :/"
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
"# LSTM"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"n_steps = 28\n",
"n_inputs = 28\n",
"n_neurons = 150\n",
"n_outputs = 10\n",
"n_layers = 3\n",
2016-09-27 23:31:21 +02:00
"\n",
"learning_rate = 0.001\n",
"\n",
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
"y = tf.placeholder(tf.int32, [None])\n",
"\n",
"lstm_cells = [tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"multi_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
2016-09-27 23:31:21 +02:00
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
"top_layer_h_state = states[-1][1]\n",
"logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
2017-02-17 11:51:26 +01:00
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
2016-09-27 23:31:21 +02:00
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"correct = tf.nn.in_top_k(logits, y, 1)\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"states"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"top_layer_h_state"
]
},
{
"cell_type": "code",
"execution_count": 92,
2016-09-27 23:31:21 +02:00
"metadata": {
"scrolled": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_epochs = 10\n",
"batch_size = 150\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n",
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
2016-09-27 23:31:21 +02:00
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
2016-09-27 23:31:21 +02:00
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
" print(epoch, \"Last batch accuracy:\", acc_batch, \"Test accuracy:\", acc_test)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=n_neurons, use_peepholes=True)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
"source": [
"gru_cell = tf.nn.rnn_cell.GRUCell(num_units=n_neurons)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"# Embeddings"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)."
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"## Fetch the data"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
2016-11-24 17:23:11 +01:00
"from six.moves import urllib\n",
"\n",
"import errno\n",
2016-11-24 17:23:11 +01:00
"import os\n",
"import zipfile\n",
"\n",
"WORDS_PATH = \"datasets/words\"\n",
"WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n",
"\n",
"def mkdir_p(path):\n",
" \"\"\"Create directories, ok if they already exist.\n",
" \n",
" This is for python 2 support. In python >=3.2, simply use:\n",
" >>> os.makedirs(path, exist_ok=True)\n",
" \"\"\"\n",
" try:\n",
" os.makedirs(path)\n",
" except OSError as exc:\n",
" if exc.errno == errno.EEXIST and os.path.isdir(path):\n",
" pass\n",
" else:\n",
" raise\n",
"\n",
2016-11-24 17:23:11 +01:00
"def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n",
" os.makedirs(words_path, exist_ok=True)\n",
" zip_path = os.path.join(words_path, \"words.zip\")\n",
" if not os.path.exists(zip_path):\n",
" urllib.request.urlretrieve(words_url, zip_path)\n",
" with zipfile.ZipFile(zip_path) as f:\n",
" data = f.read(f.namelist()[0])\n",
" return data.decode(\"ascii\").split()"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"words = fetch_words_data()"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"words[:5]"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-09-27 23:31:21 +02:00
"source": [
2016-11-24 17:23:11 +01:00
"## Build the dictionary"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"from collections import Counter\n",
"\n",
"vocabulary_size = 50000\n",
"\n",
"vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n",
"vocabulary = np.array([word for word, _ in vocabulary])\n",
"dictionary = {word: code for code, word in enumerate(vocabulary)}\n",
"data = np.array([dictionary.get(word, 0) for word in words])"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"\" \".join(words[:9]), data[:9]"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"words[24], data[24]"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"## Generate batches"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"from collections import deque\n",
"\n",
"def generate_batch(batch_size, num_skips, skip_window):\n",
" global data_index\n",
" assert batch_size % num_skips == 0\n",
" assert num_skips <= 2 * skip_window\n",
" batch = np.ndarray(shape=[batch_size], dtype=np.int32)\n",
" labels = np.ndarray(shape=[batch_size, 1], dtype=np.int32)\n",
2016-11-24 17:23:11 +01:00
" span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
" buffer = deque(maxlen=span)\n",
" for _ in range(span):\n",
" buffer.append(data[data_index])\n",
" data_index = (data_index + 1) % len(data)\n",
" for i in range(batch_size // num_skips):\n",
" target = skip_window # target label at the center of the buffer\n",
" targets_to_avoid = [ skip_window ]\n",
" for j in range(num_skips):\n",
" while target in targets_to_avoid:\n",
" target = np.random.randint(0, span)\n",
2016-11-24 17:23:11 +01:00
" targets_to_avoid.append(target)\n",
" batch[i * num_skips + j] = buffer[skip_window]\n",
" labels[i * num_skips + j, 0] = buffer[target]\n",
" buffer.append(data[data_index])\n",
" data_index = (data_index + 1) % len(data)\n",
" return batch, labels"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"np.random.seed(42)"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"data_index = 0\n",
"batch, labels = generate_batch(8, 2, 1)"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"batch, [vocabulary[word] for word in batch]"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
2016-11-24 17:23:11 +01:00
"source": [
"labels, [vocabulary[word] for word in labels[:, 0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"## Build the model"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"batch_size = 128\n",
"embedding_size = 128 # Dimension of the embedding vector.\n",
"skip_window = 1 # How many words to consider left and right.\n",
"num_skips = 2 # How many times to reuse an input to generate a label.\n",
"\n",
"# We pick a random validation set to sample nearest neighbors. Here we limit the\n",
"# validation samples to the words that have a low numeric ID, which by\n",
"# construction are also the most frequent.\n",
"valid_size = 16 # Random set of words to evaluate similarity on.\n",
"valid_window = 100 # Only pick dev samples in the head of the distribution.\n",
"valid_examples = np.random.choice(valid_window, valid_size, replace=False)\n",
2016-11-24 17:23:11 +01:00
"num_sampled = 64 # Number of negative examples to sample.\n",
"\n",
"learning_rate = 0.01"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"reset_graph()\n",
2016-11-24 17:23:11 +01:00
"\n",
"# Input data.\n",
"train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
"valid_dataset = tf.constant(valid_examples, dtype=tf.int32)"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"vocabulary_size = 50000\n",
"embedding_size = 150\n",
2016-11-24 17:23:11 +01:00
"\n",
"# Look up embeddings for inputs.\n",
"init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
"embeddings = tf.Variable(init_embeds)"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
"embed = tf.nn.embedding_lookup(embeddings, train_inputs)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
2016-11-24 17:23:11 +01:00
"# Construct the variables for the NCE loss\n",
"nce_weights = tf.Variable(\n",
" tf.truncated_normal([vocabulary_size, embedding_size],\n",
" stddev=1.0 / np.sqrt(embedding_size)))\n",
"nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n",
"\n",
"# Compute the average NCE loss for the batch.\n",
"# tf.nce_loss automatically draws a new sample of the negative labels each\n",
"# time we evaluate the loss.\n",
"loss = tf.reduce_mean(\n",
2017-02-17 11:51:26 +01:00
" tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,\n",
2016-11-24 17:23:11 +01:00
" num_sampled, vocabulary_size))\n",
"\n",
"# Construct the Adam optimizer\n",
"optimizer = tf.train.AdamOptimizer(learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"# Compute the cosine similarity between minibatch examples and all embeddings.\n",
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keepdims=True))\n",
2016-11-24 17:23:11 +01:00
"normalized_embeddings = embeddings / norm\n",
"valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
"similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
"\n",
"# Add variable initializer.\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"## Train the model"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"num_steps = 10001\n",
2016-11-24 17:23:11 +01:00
"\n",
"with tf.Session() as session:\n",
" init.run()\n",
"\n",
" average_loss = 0\n",
" for step in range(num_steps):\n",
" print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n",
" batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n",
" feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n",
"\n",
" # We perform one update step by evaluating the training op (including it\n",
" # in the list of returned values for session.run()\n",
" _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n",
" average_loss += loss_val\n",
"\n",
" if step % 2000 == 0:\n",
" if step > 0:\n",
" average_loss /= 2000\n",
" # The average loss is an estimate of the loss over the last 2000 batches.\n",
" print(\"Average loss at step \", step, \": \", average_loss)\n",
" average_loss = 0\n",
"\n",
" # Note that this is expensive (~20% slowdown if computed every 500 steps)\n",
" if step % 10000 == 0:\n",
" sim = similarity.eval()\n",
" for i in range(valid_size):\n",
" valid_word = vocabulary[valid_examples[i]]\n",
" top_k = 8 # number of nearest neighbors\n",
" nearest = (-sim[i, :]).argsort()[1:top_k+1]\n",
" log_str = \"Nearest to %s:\" % valid_word\n",
" for k in range(top_k):\n",
" close_word = vocabulary[nearest[k]]\n",
" log_str = \"%s %s,\" % (log_str, close_word)\n",
" print(log_str)\n",
"\n",
" final_embeddings = normalized_embeddings.eval()"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"np.save(\"./my_final_embeddings.npy\", final_embeddings)"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"## Plot the embeddings"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
2016-09-27 23:31:21 +02:00
"outputs": [],
2016-11-24 17:23:11 +01:00
"source": [
"def plot_with_labels(low_dim_embs, labels):\n",
" assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n",
" plt.figure(figsize=(18, 18)) #in inches\n",
" for i, label in enumerate(labels):\n",
" x, y = low_dim_embs[i,:]\n",
" plt.scatter(x, y)\n",
" plt.annotate(label,\n",
" xy=(x, y),\n",
" xytext=(5, 2),\n",
" textcoords='offset points',\n",
" ha='right',\n",
" va='bottom')"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"from sklearn.manifold import TSNE\n",
"\n",
"tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n",
"plot_only = 500\n",
"low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n",
"labels = [vocabulary[i] for i in range(plot_only)]\n",
"plot_with_labels(low_dim_embs, labels)"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"# Machine Translation"
]
},
{
"cell_type": "markdown",
"metadata": {},
2016-11-24 17:23:11 +01:00
"source": [
"The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters."
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"import tensorflow as tf\n",
"reset_graph()\n",
2016-11-24 17:23:11 +01:00
"\n",
"n_steps = 50\n",
"n_neurons = 200\n",
"n_layers = 3\n",
"num_encoder_symbols = 20000\n",
"num_decoder_symbols = 20000\n",
"embedding_size = 150\n",
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n",
"Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n",
"W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n",
"Y_input = Y[:, :-1]\n",
"Y_target = Y[:, 1:]\n",
"\n",
2017-02-17 11:51:26 +01:00
"encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n",
"decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n",
2016-11-24 17:23:11 +01:00
"\n",
"lstm_cells = [tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)\n",
" for layer in range(n_layers)]\n",
"cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
2016-11-24 17:23:11 +01:00
"\n",
2017-02-17 11:51:26 +01:00
"output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n",
2016-11-24 17:23:11 +01:00
" encoder_inputs,\n",
" decoder_inputs,\n",
" cell,\n",
" num_encoder_symbols,\n",
" num_decoder_symbols,\n",
" embedding_size)\n",
"\n",
2017-02-17 11:51:26 +01:00
"logits = tf.transpose(tf.unstack(output_seqs), perm=[1, 0, 2])"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
2016-11-24 17:23:11 +01:00
"outputs": [],
"source": [
"logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
"Y_target_flat = tf.reshape(Y_target, [-1])\n",
"W_flat = tf.reshape(W, [-1])\n",
2017-02-17 11:51:26 +01:00
"xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y_target_flat, logits=logits_flat)\n",
2016-11-24 17:23:11 +01:00
"loss = tf.reduce_mean(xentropy)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
2016-11-24 17:23:11 +01:00
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. to 6."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"See Appendix A."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7. Embedded Reber Grammars"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First we need to build a function that generates strings based on a grammar. The grammar will be represented as a list of possible transitions for each state. A transition specifies the string to output (or a grammar to generate it) and the next state."
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"\n",
"default_reber_grammar = [\n",
" [(\"B\", 1)], # (state 0) =B=>(state 1)\n",
" [(\"T\", 2), (\"P\", 3)], # (state 1) =T=>(state 2) or =P=>(state 3)\n",
" [(\"S\", 2), (\"X\", 4)], # (state 2) =S=>(state 2) or =X=>(state 4)\n",
" [(\"T\", 3), (\"V\", 5)], # and so on...\n",
" [(\"X\", 3), (\"S\", 6)],\n",
" [(\"P\", 4), (\"V\", 6)],\n",
" [(\"E\", None)]] # (state 6) =E=>(terminal state)\n",
"\n",
"embedded_reber_grammar = [\n",
" [(\"B\", 1)],\n",
" [(\"T\", 2), (\"P\", 3)],\n",
" [(default_reber_grammar, 4)],\n",
" [(default_reber_grammar, 5)],\n",
" [(\"T\", 6)],\n",
" [(\"P\", 6)],\n",
" [(\"E\", None)]]\n",
"\n",
"def generate_string(grammar):\n",
" state = 0\n",
" output = []\n",
" while state is not None:\n",
" index = np.random.randint(len(grammar[state]))\n",
" production, state = grammar[state][index]\n",
" if isinstance(production, list):\n",
" production = generate_string(grammar=production)\n",
" output.append(production)\n",
" return \"\".join(output)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's generate a few strings based on the default Reber grammar:"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
"for _ in range(25):\n",
" print(generate_string(default_reber_grammar), end=\" \")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Looks good. Now let's generate a few strings based on the embedded Reber grammar:"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [],
"source": [
"for _ in range(25):\n",
" print(generate_string(embedded_reber_grammar), end=\" \")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Okay, now we need a function to generate strings that do not respect the grammar. We could generate a random string, but the task would be a bit too easy, so instead we will generate a string that respects the grammar, and we will corrupt it by changing just one character:"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"def generate_corrupted_string(grammar, chars=\"BEPSTVX\"):\n",
" good_string = generate_string(grammar)\n",
" index = np.random.randint(len(good_string))\n",
" good_char = good_string[index]\n",
" bad_char = np.random.choice(sorted(set(chars) - set(good_char)))\n",
" return good_string[:index] + bad_char + good_string[index + 1:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's look at a few corrupted strings:"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"for _ in range(25):\n",
" print(generate_corrupted_string(embedded_reber_grammar), end=\" \")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It's not possible to feed a string directly to an RNN: we need to convert it to a sequence of vectors, first. Each vector will represent a single letter, using a one-hot encoding. For example, the letter \"B\" will be represented as the vector `[1, 0, 0, 0, 0, 0, 0]`, the letter E will be represented as `[0, 1, 0, 0, 0, 0, 0]` and so on. Let's write a function that converts a string to a sequence of such one-hot vectors. Note that if the string is shorted than `n_steps`, it will be padded with zero vectors (later, we will tell TensorFlow how long each string actually is using the `sequence_length` parameter)."
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"def string_to_one_hot_vectors(string, n_steps, chars=\"BEPSTVX\"):\n",
" char_to_index = {char: index for index, char in enumerate(chars)}\n",
" output = np.zeros((n_steps, len(chars)), dtype=np.int32)\n",
" for index, char in enumerate(string):\n",
" output[index, char_to_index[char]] = 1.\n",
" return output"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"string_to_one_hot_vectors(\"BTBTXSETE\", 12)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can now generate the dataset, with 50% good strings, and 50% bad strings:"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [],
"source": [
"def generate_dataset(size):\n",
" good_strings = [generate_string(embedded_reber_grammar)\n",
" for _ in range(size // 2)]\n",
" bad_strings = [generate_corrupted_string(embedded_reber_grammar)\n",
" for _ in range(size - size // 2)]\n",
" all_strings = good_strings + bad_strings\n",
" n_steps = max([len(string) for string in all_strings])\n",
" X = np.array([string_to_one_hot_vectors(string, n_steps)\n",
" for string in all_strings])\n",
" seq_length = np.array([len(string) for string in all_strings])\n",
" y = np.array([[1] for _ in range(len(good_strings))] +\n",
" [[0] for _ in range(len(bad_strings))])\n",
" rnd_idx = np.random.permutation(size)\n",
" return X[rnd_idx], seq_length[rnd_idx], y[rnd_idx]"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [],
"source": [
"X_train, l_train, y_train = generate_dataset(10000)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's take a look at the first training instances:"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"X_train[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It's padded with a lot of zeros because the longest string in the dataset is that long. How long is this particular string?"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"l_train[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What class is it?"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"y_train[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Perfect! We are ready to create the RNN to identify good strings. We build a sequence classifier very similar to the one we built earlier to classify MNIST images, with two main differences:\n",
"* First, the input strings have variable length, so we need to specify the `sequence_length` when calling the `dynamic_rnn()` function.\n",
"* Second, this is a binary classifier, so we only need one output neuron that will output, for each input string, the estimated log probability that it is a good string. For multiclass classification, we used `sparse_softmax_cross_entropy_with_logits()` but for binary classification we use `sigmoid_cross_entropy_with_logits()`.\n"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"possible_chars = \"BEPSTVX\"\n",
"n_inputs = len(possible_chars)\n",
"n_neurons = 30\n",
"n_outputs = 1\n",
"\n",
"learning_rate = 0.02\n",
"momentum = 0.95\n",
"\n",
"X = tf.placeholder(tf.float32, [None, None, n_inputs], name=\"X\")\n",
"seq_length = tf.placeholder(tf.int32, [None], name=\"seq_length\")\n",
"y = tf.placeholder(tf.float32, [None, 1], name=\"y\")\n",
"\n",
"gru_cell = tf.nn.rnn_cell.GRUCell(num_units=n_neurons)\n",
"outputs, states = tf.nn.dynamic_rnn(gru_cell, X, dtype=tf.float32,\n",
" sequence_length=seq_length)\n",
"\n",
"logits = tf.layers.dense(states, n_outputs, name=\"logits\")\n",
"y_pred = tf.cast(tf.greater(logits, 0.), tf.float32, name=\"y_pred\")\n",
"y_proba = tf.nn.sigmoid(logits, name=\"y_proba\")\n",
"\n",
"xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)\n",
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,\n",
" momentum=momentum,\n",
" use_nesterov=True)\n",
"training_op = optimizer.minimize(loss)\n",
"\n",
"correct = tf.equal(y_pred, y, name=\"correct\")\n",
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name=\"accuracy\")\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's generate a validation set so we can track progress during training:"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [],
2016-11-24 17:23:11 +01:00
"source": [
"X_val, l_val, y_val = generate_dataset(5000)"
2016-11-24 17:23:11 +01:00
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [],
"source": [
"n_epochs = 50\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" X_batches = np.array_split(X_train, len(X_train) // batch_size)\n",
" l_batches = np.array_split(l_train, len(l_train) // batch_size)\n",
" y_batches = np.array_split(y_train, len(y_train) // batch_size)\n",
" for X_batch, l_batch, y_batch in zip(X_batches, l_batches, y_batches):\n",
" loss_val, _ = sess.run(\n",
" [loss, training_op],\n",
" feed_dict={X: X_batch, seq_length: l_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, seq_length: l_batch, y: y_batch})\n",
" acc_val = accuracy.eval(feed_dict={X: X_val, seq_length: l_val, y: y_val})\n",
" print(\"{:4d} Train loss: {:.4f}, accuracy: {:.2f}% Validation accuracy: {:.2f}%\".format(\n",
" epoch, loss_val, 100 * acc_train, 100 * acc_val))\n",
" saver.save(sess, \"./my_reber_classifier\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's test our RNN on two tricky strings: the first one is bad while the second one is good. They only differ by the second to last character. If the RNN gets this right, it shows that it managed to notice the pattern that the second letter should always be equal to the second to last letter. That requires a fairly long short-term memory (which is the reason why we used a GRU cell)."
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [],
"source": [
"test_strings = [\n",
" \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVETE\",\n",
" \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVEPE\"]\n",
"l_test = np.array([len(s) for s in test_strings])\n",
"max_length = l_test.max()\n",
"X_test = [string_to_one_hot_vectors(s, n_steps=max_length)\n",
" for s in test_strings]\n",
"\n",
"with tf.Session() as sess:\n",
" saver.restore(sess, \"./my_reber_classifier\")\n",
" y_proba_val = y_proba.eval(feed_dict={X: X_test, seq_length: l_test})\n",
"\n",
"print()\n",
"print(\"Estimated probability that these are Reber strings:\")\n",
"for index, string in enumerate(test_strings):\n",
" print(\"{}: {:.2f}%\".format(string, 100 * y_proba_val[index][0]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ta-da! It worked fine. The RNN found the correct answers with high confidence. :)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 8. and 9."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Coming soon..."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
2016-09-27 23:31:21 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
2016-09-27 23:31:21 +02:00
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2019-04-05 11:04:38 +02:00
"version": "3.6.8"
2016-09-27 23:31:21 +02:00
},
"nav_menu": {},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
2016-09-27 23:31:21 +02:00
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
2016-09-27 23:31:21 +02:00
}