4049 lines
111 KiB
Plaintext
4049 lines
111 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Chapter 15 – Recurrent Neural Networks**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"_This notebook contains all the sample code in chapter 15._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Python ≥3.5 is required\n",
|
||
"import sys\n",
|
||
"assert sys.version_info >= (3, 5)\n",
|
||
"\n",
|
||
"# Scikit-Learn ≥0.20 is required\n",
|
||
"import sklearn\n",
|
||
"assert sklearn.__version__ >= \"0.20\"\n",
|
||
"\n",
|
||
"# TensorFlow ≥2.0-preview is required\n",
|
||
"import tensorflow as tf\n",
|
||
"from tensorflow import keras\n",
|
||
"assert tf.__version__ >= \"2.0\"\n",
|
||
"\n",
|
||
"# Common imports\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"\n",
|
||
"# to make this notebook's output stable across runs\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"# To plot pretty figures\n",
|
||
"%matplotlib inline\n",
|
||
"import matplotlib as mpl\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"mpl.rc('axes', labelsize=14)\n",
|
||
"mpl.rc('xtick', labelsize=12)\n",
|
||
"mpl.rc('ytick', labelsize=12)\n",
|
||
"\n",
|
||
"# Where to save the figures\n",
|
||
"PROJECT_ROOT_DIR = \".\"\n",
|
||
"CHAPTER_ID = \"rnn\"\n",
|
||
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
|
||
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
|
||
"\n",
|
||
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
|
||
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
|
||
" print(\"Saving figure\", fig_id)\n",
|
||
" if tight_layout:\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.savefig(path, format=fig_extension, dpi=resolution)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Basic RNNs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Generate the Dataset"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 493,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def generate_time_series(batch_size, n_steps):\n",
|
||
" freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)\n",
|
||
" time = np.linspace(0, 1, n_steps)\n",
|
||
" series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10)) # wave 1\n",
|
||
" series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2\n",
|
||
" series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5) # + noise\n",
|
||
" return series[..., np.newaxis].astype(np.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 494,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"n_steps = 50\n",
|
||
"series = generate_time_series(10000, n_steps + 1)\n",
|
||
"X_train, y_train = series[:7000, :n_steps], series[:7000, -1]\n",
|
||
"X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]\n",
|
||
"X_test, y_test = series[9000:, :n_steps], series[9000:, -1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 495,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train.shape, y_train.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 496,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\"):\n",
|
||
" plt.plot(series, \".-\")\n",
|
||
" if y is not None:\n",
|
||
" plt.plot(n_steps, y, \"bx\", markersize=10)\n",
|
||
" if y_pred is not None:\n",
|
||
" plt.plot(n_steps, y_pred, \"ro\")\n",
|
||
" plt.grid(True)\n",
|
||
" if x_label:\n",
|
||
" plt.xlabel(x_label, fontsize=16)\n",
|
||
" if y_label:\n",
|
||
" plt.ylabel(y_label, fontsize=16, rotation=0)\n",
|
||
" plt.hlines(0, 0, 100, linewidth=1)\n",
|
||
" plt.axis([0, n_steps + 1, -1, 1])\n",
|
||
"\n",
|
||
"fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))\n",
|
||
"for col in range(3):\n",
|
||
" plt.sca(axes[col])\n",
|
||
" plot_series(X_valid[col, :, 0], y_valid[col, 0],\n",
|
||
" y_label=(\"$x(t)$\" if col==0 else None))\n",
|
||
"save_fig(\"time_series_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Computing Some Baselines"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Naive predictions (just predict the last observed value):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 497,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = X_valid[:, -1]\n",
|
||
"np.mean(keras.losses.mean_squared_error(y_valid, y_pred))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 498,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Linear predictions:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 499,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Flatten(),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 500,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_valid, y_valid)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 501,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_learning_curves(loss, val_loss):\n",
|
||
" plt.plot(np.arange(len(loss)) + 0.5, loss, \"b.-\", label=\"Training loss\")\n",
|
||
" plt.plot(np.arange(len(val_loss)) + 1, val_loss, \"r.-\", label=\"Validation loss\")\n",
|
||
" plt.gca().xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))\n",
|
||
" plt.axis([1, 20, 0, 0.05])\n",
|
||
" plt.legend(fontsize=14)\n",
|
||
" plt.xlabel(\"Epochs\")\n",
|
||
" plt.ylabel(\"Loss\")\n",
|
||
" plt.grid(True)\n",
|
||
"\n",
|
||
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 502,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict(X_valid)\n",
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Using a Simple RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 503,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([keras.layers.SimpleRNN(1)])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 504,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_valid, y_valid)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 505,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 506,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict(X_valid)\n",
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Deep RNNs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 513,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(1)\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 514,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_valid, y_valid)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 515,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 516,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict(X_valid)\n",
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 517,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(20),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 518,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_valid, y_valid)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 519,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 520,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict(X_valid)\n",
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Forecasting Several Steps Ahead"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 390,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_new.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 394,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"Y_pred.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 401,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(43) # not 42, as it would give the first series in the train set\n",
|
||
"\n",
|
||
"series = generate_time_series(1, n_steps + 10)\n",
|
||
"X_new, Y_new = series[:, :n_steps], series[:, n_steps:]\n",
|
||
"X = X_new\n",
|
||
"for step_ahead in range(10):\n",
|
||
" y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]\n",
|
||
" X = np.concatenate([X, y_pred_one], axis=1)\n",
|
||
"\n",
|
||
"Y_pred = X[:, n_steps:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 402,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"Y_pred.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 403,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_series(X_new[0, :50, 0])\n",
|
||
"plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
|
||
"plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
|
||
"plt.axis([0, 60, -1, 1])\n",
|
||
"save_fig(\"forecast_ahead_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now let's create an RNN that predicts all 10 next values at once:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 521,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"n_steps = 50\n",
|
||
"series = generate_time_series(10000, n_steps + 10)\n",
|
||
"X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n",
|
||
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n",
|
||
"X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 522,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1)),\n",
|
||
" keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 523,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(43)\n",
|
||
"\n",
|
||
"series = generate_time_series(1, 50 + 10)\n",
|
||
"X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n",
|
||
"Y_pred = model.predict(X_new)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 524,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_series(X_new[0, :50, 0])\n",
|
||
"plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
|
||
"plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
|
||
"plt.axis([0, 60, -1, 1])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 531,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"n_steps = 50\n",
|
||
"series = generate_time_series(10000, n_steps + 10)\n",
|
||
"X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n",
|
||
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n",
|
||
"X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 532,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train.shape, Y_train.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 527,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"def last_10_time_steps_mse(Y_true, Y_pred):\n",
|
||
" return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 409,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(43)\n",
|
||
"\n",
|
||
"series = generate_time_series(1, 50 + 10)\n",
|
||
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
||
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 410,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_series(X_new[0, :50, 0])\n",
|
||
"plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
|
||
"plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
|
||
"plt.axis([0, 60, -1, 1])\n",
|
||
"save_fig(\"forecast_ahead_multi_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Deep RNN with Batch Norm"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 534,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.BatchNormalization(),\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.BatchNormalization(),\n",
|
||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||
" keras.layers.BatchNormalization(),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 365,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.summary()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Deep RNNs with Layer Norm"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 619,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"keras.layers.GRUCell.get_initial_state?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 622,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"LayerNormalization = keras.layers.experimental.LayerNormalization\n",
|
||
"\n",
|
||
"class LNSimpleRNNCell(keras.layers.Layer):\n",
|
||
" def __init__(self, units, activation=\"tanh\", **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.state_size = units\n",
|
||
" self.output_size = units\n",
|
||
" self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,\n",
|
||
" activation=None)\n",
|
||
" self.layer_norm = LayerNormalization()\n",
|
||
" self.activation = keras.activations.get(activation)\n",
|
||
" def get_initial_state(self, inputs=None, batch_size=None, dtype=None):\n",
|
||
" return tf.zeros([batch_size, self.state_size], dtype=dtype)\n",
|
||
" def call(self, inputs, states):\n",
|
||
" outputs, new_states = self.simple_rnn_cell(inputs, states)\n",
|
||
" norm_outputs = self.activation(self.layer_norm(outputs))\n",
|
||
" return norm_outputs, [norm_outputs]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 623,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
||
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# LSTMs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 626,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.LSTM(20, return_sequences=True),\n",
|
||
" keras.layers.LSTM(20, return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 368,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_valid, y_valid)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 369,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 370,
|
||
"metadata": {
|
||
"scrolled": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict(X_valid)\n",
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# GRUs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 648,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 372,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_valid, y_valid)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 373,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 374,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict(X_valid)\n",
|
||
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using One-Dimensional Convolutional Layers to Process Sequences"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"```\n",
|
||
"1D conv layer with kernel size 4, stride 2, VALID padding:\n",
|
||
"\n",
|
||
" |-----2----| |-----5---... |----23-----|\n",
|
||
" |-----1----| |-----4-----| ... |-----22----|\n",
|
||
" |-----0----| |-----3----| |---...-21---|\n",
|
||
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
|
||
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
||
"\n",
|
||
"Output:\n",
|
||
"\n",
|
||
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
|
||
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
|
||
"```"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 638,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"def last_5_time_steps_mse(Y_true, Y_pred):\n",
|
||
" return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\"),\n",
|
||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid[:, 3::2]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## WaveNet"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"```\n",
|
||
"C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n",
|
||
" / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n",
|
||
" / \\ / \\ / \\ / \\\n",
|
||
"C1 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n",
|
||
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
|
||
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
||
"\n",
|
||
"Output:\n",
|
||
"\n",
|
||
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
|
||
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
|
||
"```"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 671,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential()\n",
|
||
"for rate in (1, 2, 4, 8) * 2:\n",
|
||
" activation = \"relu\" if len(model.layers) < 7 else None\n",
|
||
" model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n",
|
||
" activation=activation, dilation_rate=rate))\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train[:, 30:], epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid[:, 30:]))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Here is the original WaveNet defined in the paper: it uses Gated Activation Units instead of ReLU and parametrized skip connections, plus it pads with zeros on the left to avoid getting shorter and shorter sequences:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 734,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tensorflow import keras\n",
|
||
"\n",
|
||
"class GatedActivationUnit(keras.layers.Layer):\n",
|
||
" def __init__(self, activation=\"tanh\", **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.activation = keras.activations.get(activation)\n",
|
||
" def call(self, inputs):\n",
|
||
" n_filters = inputs.shape[-1] // 2\n",
|
||
" linear_output = self.activation(inputs[..., :n_filters])\n",
|
||
" gate = keras.activations.sigmoid(inputs[..., n_filters:])\n",
|
||
" return self.activation(linear_output) * gate"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 735,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def wavenet_residual_block(inputs, n_filters, dilation_rate):\n",
|
||
" z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n",
|
||
" z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n",
|
||
" dilation_rate=dilation_rate)(z)\n",
|
||
" z = GatedActivationUnit()(z)\n",
|
||
" z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n",
|
||
" return keras.layers.Add()([z, inputs]), z"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 736,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"inputs = keras.layers.Input(shape=[10000, 1])\n",
|
||
"skip_to_last = []\n",
|
||
"n_filters = 128\n",
|
||
"z = keras.backend.temporal_padding(inputs, (1, 0))\n",
|
||
"z = keras.layers.Conv1D(n_filters, kernel_size=2, kernel_size=1)(z)\n",
|
||
"for dilation_rate in [2**i for i in range(10)] * 3:\n",
|
||
" z, skip = wavenet_residual_block(z, 128, dilation_rate)\n",
|
||
" skip_to_last.append(skip)\n",
|
||
"z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
|
||
"z = keras.layers.Conv1D(128, kernel_size=1, activation=\"relu\")(z)\n",
|
||
"Y_proba = keras.layers.Conv1D(256, kernel_size=1, activation=\"softmax\")(z)\n",
|
||
"\n",
|
||
"model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 732,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"seq_length = 10000\n",
|
||
"n_layers_per_block = 10\n",
|
||
"n_blocks = 3\n",
|
||
"n_filters = 128\n",
|
||
"n_outputs = 256\n",
|
||
"\n",
|
||
"inputs = keras.layers.Input(shape=[seq_length, 1])\n",
|
||
"skip_to_last = []\n",
|
||
"z = inputs\n",
|
||
"for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n",
|
||
" z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n",
|
||
" skip_to_last.append(skip)\n",
|
||
"z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
|
||
"z = keras.layers.Conv1D(n_filters, kernel_size=1, activation=\"relu\")(z)\n",
|
||
"Y_proba = keras.layers.Conv1D(n_outputs, kernel_size=1, activation=\"softmax\")(z)\n",
|
||
"\n",
|
||
"model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Low-Level RNN API"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 611,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class MyRNN(keras.layers.Layer):\n",
|
||
" def __init__(self, cell, return_sequences=False, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.cell = cell\n",
|
||
" self.return_sequences = return_sequences\n",
|
||
" try:\n",
|
||
" self.initial_state = self.cell.get_initial_state()\n",
|
||
" except AttributeError:\n",
|
||
" self.initial_state = [tf.zeros(shape=[size], dtype=inputs.dtype)\n",
|
||
" for size in self.cell.states_size]\n",
|
||
" def call(self, inputs):\n",
|
||
" n_steps = tf.shape(inputs)[1]\n",
|
||
" if self.return_sequences:\n",
|
||
" sequences = tf.TensorArray(inputs.dtype, size=n_steps)\n",
|
||
" for step in tf.range(n_steps):\n",
|
||
" outputs, states = self.cell(inputs[:, step], states)\n",
|
||
" if self.return_sequences:\n",
|
||
" sequences.write(step, outputs)\n",
|
||
" if self.return_sequences:\n",
|
||
" return sequences.stack(), states\n",
|
||
" else:\n",
|
||
" return outputs, states"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 612,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
||
" MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||
"])\n",
|
||
"\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, Y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch_size = 32\n",
|
||
"\n",
|
||
"X_batch = X_train[:batch_size]\n",
|
||
"y_batch = y_train[:batch_size]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_neurons = 10\n",
|
||
"cell = keras.layers.SimpleRNNCell(n_neurons)\n",
|
||
"\n",
|
||
"states = [tf.zeros((batch_size, n_neurons))]\n",
|
||
"for step in range(n_steps):\n",
|
||
" output, states = cell(X_batch[:, step], states)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Splitting a sequence into batches of shuffled windows"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"For example, let's split the sequence 0 to 14 into windows of length 5, each shifted by 2 (e.g.,`[0, 1, 2, 3, 4]`, `[2, 3, 4, 5, 6]`, etc.), then shuffle them, and split them into inputs (the first 4 steps) and targets (the last 4 steps) (e.g., `[2, 3, 4, 5, 6]` would be split into `[[2, 3, 4, 5], [3, 4, 5, 6]]`), then create batches of 3 such input/target pairs:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 467,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 5\n",
|
||
"dataset = tf.data.Dataset.from_tensor_slices(tf.range(15))\n",
|
||
"dataset = dataset.window(n_steps, shift=2, drop_remainder=True)\n",
|
||
"dataset = dataset.flat_map(lambda window: window.batch(n_steps))\n",
|
||
"dataset = dataset.shuffle(10).map(lambda window: (window[:-1], window[1:]))\n",
|
||
"dataset = dataset.batch(3).prefetch(1)\n",
|
||
"for index, (X_batch, y_batch) in enumerate(dataset):\n",
|
||
" print(\"_\" * 20, \"Batch\", index, \"\\nX_batch\")\n",
|
||
" print(X_batch.numpy())\n",
|
||
" print(\"=\" * 5, \"\\nY_batch\")\n",
|
||
" print(y_batch.numpy())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Char-RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 452,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"shakespeare_url = \"https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\"\n",
|
||
"filepath = keras.utils.get_file(\"shakespeare.txt\", shakespeare_url)\n",
|
||
"with open(filepath) as f:\n",
|
||
" shakespeare_text = f.read()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 453,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(shakespeare_text[:148])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 454,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\"\".join(sorted(set(shakespeare_text.lower())))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 468,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)\n",
|
||
"tokenizer.fit_on_texts(shakespeare_text)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 469,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tokenizer.texts_to_sequences([\"First\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 470,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 471,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"max_id = len(tokenizer.word_index) # number of distinct characters\n",
|
||
"dataset_size = tokenizer.document_count # total number of characters"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 472,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text]))\n",
|
||
"train_size = dataset_size * 90 // 100\n",
|
||
"dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 473,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 100 + 1 # 100 input characters, 1 target\n",
|
||
"dataset = dataset.repeat().window(n_steps, shift=1, drop_remainder=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 474,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dataset = dataset.flat_map(lambda window: window.batch(n_steps))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 475,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dataset = dataset.shuffle(10000).map(lambda window: (window[:-1], window[1:]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 476,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dataset = dataset.map(\n",
|
||
" lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 477,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch_size = 32\n",
|
||
"dataset = dataset.batch(batch_size).prefetch(1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 478,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for X_batch, Y_batch in dataset.take(1):\n",
|
||
" print(X_batch.shape, Y_batch.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 482,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.GRU(128, return_sequences=True),\n",
|
||
" keras.layers.GRU(128, return_sequences=True),\n",
|
||
" keras.layers.GRU(max_id, return_sequences=True, activation=\"softmax\"),\n",
|
||
"])\n",
|
||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n",
|
||
"history = model.fit(dataset, steps_per_epoch=train_size // batch_size,\n",
|
||
" epochs=20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 490,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"m = keras.models.Sequential([\n",
|
||
" keras.layers.LSTM(128, return_sequences=True),\n",
|
||
" keras.layers.LSTM(3, return_sequences=True, activation=\"softmax\"),\n",
|
||
" #keras.layers.TimeDistributed(keras.layers.Dense(3, activation=\"softmax\")),\n",
|
||
"])\n",
|
||
"m.predict(np.random.rand(1, 10, 20)).sum(axis=-1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 222,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def preprocess(texts):\n",
|
||
" X = np.array(tokenizer.texts_to_sequences(texts))\n",
|
||
" return tf.one_hot(X, max_id)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 224,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_new = preprocess([\"How are yo\"])\n",
|
||
"y_pred = model.predict_classes(X_new)\n",
|
||
"tokenizer.sequences_to_texts([y_pred])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 146,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.layers[-1].weights[1].shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 232,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def next_char(texts, temperature=1):\n",
|
||
" X_new = preprocess(texts)\n",
|
||
" y_proba = model.predict(X_new)\n",
|
||
" logits = tf.math.log(y_proba) / temperature\n",
|
||
" char_id = tf.random.categorical(logits, 1)\n",
|
||
" return tokenizer.sequences_to_texts(char_id.numpy())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 234,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def complete_text(text, n_chars=50, temperature=1):\n",
|
||
" for _ in range(n_chars):\n",
|
||
" text += next_char([text], temperature)[0]\n",
|
||
" return text"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 238,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(complete_text(\"W\", temperature=0.001))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 365,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(complete_text(\"W\", temperature=0.5))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 240,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(complete_text(\"W\", temperature=1000))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Handling Sequences of Different Sizes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's shorten each time series by chopping off a random number of time steps (from the start, so we don't need to change the targets):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def shorten_series(X):\n",
|
||
" row_lengths = np.random.randint(10, n_steps + 1, size=len(X))\n",
|
||
" X_values = np.concatenate([row[-length:] for row, length in zip(X, row_lengths)])\n",
|
||
" return tf.RaggedTensor.from_row_lengths(X_values, row_lengths)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"X_train_ragged = shorten_series(X_train)\n",
|
||
"X_valid_ragged = shorten_series(X_valid)\n",
|
||
"X_test_ragged = shorten_series(X_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train_ragged.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The lengths of the first 10 series:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"[len(series) for series in X_train_ragged[:10]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 148,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"mask_value = 1000.\n",
|
||
"X_train_padded = X_train_ragged.to_tensor(default_value=mask_value)\n",
|
||
"X_valid_padded = X_valid_ragged.to_tensor(default_value=mask_value)\n",
|
||
"X_test_padded = X_test_ragged.to_tensor(default_value=mask_value)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 149,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"masking_layer = keras.layers.Masking(mask_value)\n",
|
||
"series = np.array([[[1.], [2.], [mask_value], [mask_value]],\n",
|
||
" [[3.], [4.], [5.], [mask_value]]])\n",
|
||
"masking_layer(series)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 134,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"masking_layer.compute_mask(series)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 165,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Masking(mask_value, input_shape=[50, 1]),\n",
|
||
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(1, return_sequences=True),\n",
|
||
"])\n",
|
||
"model(X_train_padded[:1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 170,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Masking(mask_value, input_shape=[50, 1]),\n",
|
||
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(100),\n",
|
||
"])\n",
|
||
"model(X_train_padded[:1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Masking(input_shape=[50, 1]),\n",
|
||
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(10, return_sequences=True),\n",
|
||
" keras.layers.SimpleRNN(1, activation=None)\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"optimizer = keras.optimizers.SGD(lr=1e-4, momentum=0.95, nesterov=True)\n",
|
||
"model.compile(loss=\"mse\", optimizer=optimizer)\n",
|
||
"history = model.fit(X_train_padded, tf.constant(y_train), epochs=20,\n",
|
||
" validation_data=(X_valid_padded, tf.constant(y_valid)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Sketch RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow_datasets as tfds"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"datasets = tfds.load(\"quickdraw_sketch_rnn\", as_supervised=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_set = datasets[\"train\"]\n",
|
||
"valid_set = datasets[\"validation\"]\n",
|
||
"test_set = datasets[\"test\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for X_batch, y_batch in train_set.take(2):\n",
|
||
" print(X_batch.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5\n",
|
||
"\n",
|
||
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"\n",
|
||
"Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))\n",
|
||
"Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))\n",
|
||
"b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))\n",
|
||
"\n",
|
||
"Y0 = tf.tanh(tf.matmul(X0, Wx) + b)\n",
|
||
"Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0\n",
|
||
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(Y0_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(Y1_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using `static_rnn()`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Note**: `tf.contrib.rnn` was partially moved to the core API in TensorFlow 1.2. Most of the `*Cell` and `*Wrapper` classes are now available in `tf.nn.rnn_cell`, and the `tf.contrib.rnn.static_rnn()` function is available as `tf.nn.static_rnn()`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X0 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"X1 = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||
"\n",
|
||
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
"output_seqs, states = tf.nn.static_rnn(basic_cell, [X0, X1],\n",
|
||
" dtype=tf.float32)\n",
|
||
"Y0, Y1 = output_seqs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n",
|
||
"X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"Y0_val"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"Y1_val"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tensorflow_graph_in_jupyter import show_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"show_graph(tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Packing sequences"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 2\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n",
|
||
"\n",
|
||
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
"output_seqs, states = tf.nn.static_rnn(basic_cell, X_seqs,\n",
|
||
" dtype=tf.float32)\n",
|
||
"outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.array([\n",
|
||
" # t = 0 t = 1 \n",
|
||
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
|
||
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
|
||
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
|
||
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
|
||
" ])\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(outputs_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(np.transpose(outputs_val, axes=[1, 0, 2])[1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using `dynamic_rnn()`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 2\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"\n",
|
||
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.array([\n",
|
||
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
|
||
" [[3, 4, 5], [0, 0, 0]], # instance 2\n",
|
||
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
|
||
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
|
||
" ])\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val = outputs.eval(feed_dict={X: X_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(outputs_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"show_graph(tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Setting the sequence lengths"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 2\n",
|
||
"n_inputs = 3\n",
|
||
"n_neurons = 5\n",
|
||
"\n",
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"seq_length = tf.placeholder(tf.int32, [None])\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,\n",
|
||
" sequence_length=seq_length)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.array([\n",
|
||
" # step 0 step 1\n",
|
||
" [[0, 1, 2], [9, 8, 7]], # instance 1\n",
|
||
" [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)\n",
|
||
" [[6, 7, 8], [6, 5, 4]], # instance 3\n",
|
||
" [[9, 0, 1], [3, 2, 1]], # instance 4\n",
|
||
" ])\n",
|
||
"seq_length_batch = np.array([2, 1, 2, 2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val, states_val = sess.run(\n",
|
||
" [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(outputs_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(states_val)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Training a sequence classifier"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
|
||
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
|
||
"* the default `activation` is now `None` rather than `tf.nn.relu`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 28\n",
|
||
"n_inputs = 28\n",
|
||
"n_neurons = 150\n",
|
||
"n_outputs = 10\n",
|
||
"\n",
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.int32, [None])\n",
|
||
"\n",
|
||
"basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
|
||
"\n",
|
||
"logits = tf.layers.dense(states, n_outputs)\n",
|
||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
|
||
" logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy)\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Warning**: `tf.examples.tutorials.mnist` is deprecated. We will use `tf.keras.datasets.mnist` instead."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
|
||
"X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0\n",
|
||
"X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0\n",
|
||
"y_train = y_train.astype(np.int32)\n",
|
||
"y_test = y_test.astype(np.int32)\n",
|
||
"X_valid, X_train = X_train[:5000], X_train[5000:]\n",
|
||
"y_valid, y_train = y_train[:5000], y_train[5000:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def shuffle_batch(X, y, batch_size):\n",
|
||
" rnd_idx = np.random.permutation(len(X))\n",
|
||
" n_batches = len(X) // batch_size\n",
|
||
" for batch_idx in np.array_split(rnd_idx, n_batches):\n",
|
||
" X_batch, y_batch = X[batch_idx], y[batch_idx]\n",
|
||
" yield X_batch, y_batch"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_test = X_test.reshape((-1, n_steps, n_inputs))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 100\n",
|
||
"batch_size = 150\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n",
|
||
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
|
||
" print(epoch, \"Last batch accuracy:\", acc_batch, \"Test accuracy:\", acc_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Multi-layer RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 28\n",
|
||
"n_inputs = 28\n",
|
||
"n_outputs = 10\n",
|
||
"\n",
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.int32, [None])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"\n",
|
||
"layers = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons,\n",
|
||
" activation=tf.nn.relu)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(layers)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"states_concat = tf.concat(axis=1, values=states)\n",
|
||
"logits = tf.layers.dense(states_concat, n_outputs)\n",
|
||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy)\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 10\n",
|
||
"batch_size = 150\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n",
|
||
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
|
||
" print(epoch, \"Last batch accuracy:\", acc_batch, \"Test accuracy:\", acc_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Time series"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t_min, t_max = 0, 30\n",
|
||
"resolution = 0.1\n",
|
||
"\n",
|
||
"def time_series(t):\n",
|
||
" return t * np.sin(t) / 3 + 2 * np.sin(t*5)\n",
|
||
"\n",
|
||
"def next_batch(batch_size, n_steps):\n",
|
||
" t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)\n",
|
||
" Ts = t0 + np.arange(0., n_steps + 1) * resolution\n",
|
||
" ys = time_series(Ts)\n",
|
||
" return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)\n",
|
||
"\n",
|
||
"plt.figure(figsize=(11,4))\n",
|
||
"plt.subplot(121)\n",
|
||
"plt.title(\"A time series (generated)\", fontsize=14)\n",
|
||
"plt.plot(t, time_series(t), label=r\"$t . \\sin(t) / 3 + 2 . \\sin(5t)$\")\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"b-\", linewidth=3, label=\"A training instance\")\n",
|
||
"plt.legend(loc=\"lower left\", fontsize=14)\n",
|
||
"plt.axis([0, 30, -17, 13])\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"plt.ylabel(\"Value\")\n",
|
||
"\n",
|
||
"plt.subplot(122)\n",
|
||
"plt.title(\"A training instance\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"\n",
|
||
"save_fig(\"time_series_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch, y_batch = next_batch(1, n_steps)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.c_[X_batch[0], y_batch[0]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using an `OuputProjectionWrapper`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each traiing instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a sigle value:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an `OutputProjectionWrapper`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cell = tf.contrib.rnn.OutputProjectionWrapper(\n",
|
||
" tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu),\n",
|
||
" output_size=n_outputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"loss = tf.reduce_mean(tf.square(outputs - y)) # MSE\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_iterations = 1500\n",
|
||
"batch_size = 50\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" if iteration % 100 == 0:\n",
|
||
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" print(iteration, \"\\tMSE:\", mse)\n",
|
||
" \n",
|
||
" saver.save(sess, \"./my_time_series_model\") # not shown in the book"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess: # not shown in the book\n",
|
||
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
|
||
"\n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.title(\"Testing the model\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"save_fig(\"time_series_pred_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Without using an `OutputProjectionWrapper`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 20\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
|
||
"rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_outputs = 1\n",
|
||
"learning_rate = 0.001"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_iterations = 1500\n",
|
||
"batch_size = 50\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" if iteration % 100 == 0:\n",
|
||
" mse = loss.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" print(iteration, \"\\tMSE:\", mse)\n",
|
||
" \n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new})\n",
|
||
" \n",
|
||
" saver.save(sess, \"./my_time_series_model\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.title(\"Testing the model\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Generating a creative new sequence"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess: # not shown in the book\n",
|
||
" saver.restore(sess, \"./my_time_series_model\") # not shown\n",
|
||
"\n",
|
||
" sequence = [0.] * n_steps\n",
|
||
" for iteration in range(300):\n",
|
||
" X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
|
||
" sequence.append(y_pred[0, -1, 0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.figure(figsize=(8,4))\n",
|
||
"plt.plot(np.arange(len(sequence)), sequence, \"b-\")\n",
|
||
"plt.plot(t[:n_steps], sequence[:n_steps], \"b-\", linewidth=3)\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"plt.ylabel(\"Value\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" saver.restore(sess, \"./my_time_series_model\")\n",
|
||
"\n",
|
||
" sequence1 = [0. for i in range(n_steps)]\n",
|
||
" for iteration in range(len(t) - n_steps):\n",
|
||
" X_batch = np.array(sequence1[-n_steps:]).reshape(1, n_steps, 1)\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
|
||
" sequence1.append(y_pred[0, -1, 0])\n",
|
||
"\n",
|
||
" sequence2 = [time_series(i * resolution + t_min + (t_max-t_min/3)) for i in range(n_steps)]\n",
|
||
" for iteration in range(len(t) - n_steps):\n",
|
||
" X_batch = np.array(sequence2[-n_steps:]).reshape(1, n_steps, 1)\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_batch})\n",
|
||
" sequence2.append(y_pred[0, -1, 0])\n",
|
||
"\n",
|
||
"plt.figure(figsize=(11,4))\n",
|
||
"plt.subplot(121)\n",
|
||
"plt.plot(t, sequence1, \"b-\")\n",
|
||
"plt.plot(t[:n_steps], sequence1[:n_steps], \"b-\", linewidth=3)\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"plt.ylabel(\"Value\")\n",
|
||
"\n",
|
||
"plt.subplot(122)\n",
|
||
"plt.plot(t, sequence2, \"b-\")\n",
|
||
"plt.plot(t[:n_steps], sequence2[:n_steps], \"b-\", linewidth=3)\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"save_fig(\"creative_sequence_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Deep RNN"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## MultiRNNCell"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 2\n",
|
||
"n_steps = 5\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"\n",
|
||
"layers = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(layers)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_batch = np.random.rand(2, n_steps, n_inputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"outputs_val.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Distributing a Deep RNN Across Multiple GPUs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Do **NOT** do this:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n",
|
||
" layer1 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
"\n",
|
||
"with tf.device(\"/gpu:1\"): # BAD! Ignored again.\n",
|
||
" layer2 = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Instead, you need a `DeviceCellWrapper`:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"\n",
|
||
"class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):\n",
|
||
" def __init__(self, device, cell):\n",
|
||
" self._cell = cell\n",
|
||
" self._device = device\n",
|
||
"\n",
|
||
" @property\n",
|
||
" def state_size(self):\n",
|
||
" return self._cell.state_size\n",
|
||
"\n",
|
||
" @property\n",
|
||
" def output_size(self):\n",
|
||
" return self._cell.output_size\n",
|
||
"\n",
|
||
" def __call__(self, inputs, state, scope=None):\n",
|
||
" with tf.device(self._device):\n",
|
||
" return self._cell(inputs, state, scope)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 5\n",
|
||
"n_steps = 20\n",
|
||
"n_neurons = 100\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=[None, n_steps, n_inputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n",
|
||
"cells = [DeviceCellWrapper(dev,tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons))\n",
|
||
" for dev in devices]\n",
|
||
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(cells)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Alternatively, since TensorFlow 1.1, you can use the `tf.contrib.rnn.DeviceWrapper` class (alias `tf.nn.rnn_cell.DeviceWrapper` since TF 1.2)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" print(sess.run(outputs, feed_dict={X: np.random.rand(2, n_steps, n_inputs)}))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Dropout"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_inputs = 1\n",
|
||
"n_neurons = 100\n",
|
||
"n_layers = 3\n",
|
||
"n_steps = 20\n",
|
||
"n_outputs = 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"keep_prob = tf.placeholder_with_default(1.0, shape=())\n",
|
||
"cells = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"cells_drop = [tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=keep_prob)\n",
|
||
" for cell in cells]\n",
|
||
"multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(cells_drop)\n",
|
||
"rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
||
"\n",
|
||
"loss = tf.reduce_mean(tf.square(outputs - y))\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_iterations = 1500\n",
|
||
"batch_size = 50\n",
|
||
"train_keep_prob = 0.5\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for iteration in range(n_iterations):\n",
|
||
" X_batch, y_batch = next_batch(batch_size, n_steps)\n",
|
||
" _, mse = sess.run([training_op, loss],\n",
|
||
" feed_dict={X: X_batch, y: y_batch,\n",
|
||
" keep_prob: train_keep_prob})\n",
|
||
" if iteration % 100 == 0: # not shown in the book\n",
|
||
" print(iteration, \"Training MSE:\", mse) # not shown\n",
|
||
" \n",
|
||
" saver.save(sess, \"./my_dropout_time_series_model\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" saver.restore(sess, \"./my_dropout_time_series_model\")\n",
|
||
"\n",
|
||
" X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n",
|
||
" y_pred = sess.run(outputs, feed_dict={X: X_new})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.title(\"Testing the model\", fontsize=14)\n",
|
||
"plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n",
|
||
"plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n",
|
||
"plt.plot(t_instance[1:], y_pred[0,:,0], \"r.\", markersize=10, label=\"prediction\")\n",
|
||
"plt.legend(loc=\"upper left\")\n",
|
||
"plt.xlabel(\"Time\")\n",
|
||
"\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Oops, it seems that Dropout does not help at all in this particular case. :/"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# LSTM"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_steps = 28\n",
|
||
"n_inputs = 28\n",
|
||
"n_neurons = 150\n",
|
||
"n_outputs = 10\n",
|
||
"n_layers = 3\n",
|
||
"\n",
|
||
"learning_rate = 0.001\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n",
|
||
"y = tf.placeholder(tf.int32, [None])\n",
|
||
"\n",
|
||
"lstm_cells = [tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"multi_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
|
||
"top_layer_h_state = states[-1][1]\n",
|
||
"logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
|
||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||
" \n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"states"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"top_layer_h_state"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 10\n",
|
||
"batch_size = 150\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n",
|
||
" X_batch = X_batch.reshape((-1, n_steps, n_inputs))\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})\n",
|
||
" print(epoch, \"Last batch accuracy:\", acc_batch, \"Test accuracy:\", acc_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=n_neurons, use_peepholes=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"gru_cell = tf.nn.rnn_cell.GRUCell(num_units=n_neurons)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Embeddings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"This section is based on TensorFlow's [Word2Vec tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/word2vec/index.html)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Fetch the data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from six.moves import urllib\n",
|
||
"\n",
|
||
"import errno\n",
|
||
"import os\n",
|
||
"import zipfile\n",
|
||
"\n",
|
||
"WORDS_PATH = \"datasets/words\"\n",
|
||
"WORDS_URL = 'http://mattmahoney.net/dc/text8.zip'\n",
|
||
"\n",
|
||
"def mkdir_p(path):\n",
|
||
" \"\"\"Create directories, ok if they already exist.\n",
|
||
" \n",
|
||
" This is for python 2 support. In python >=3.2, simply use:\n",
|
||
" >>> os.makedirs(path, exist_ok=True)\n",
|
||
" \"\"\"\n",
|
||
" try:\n",
|
||
" os.makedirs(path)\n",
|
||
" except OSError as exc:\n",
|
||
" if exc.errno == errno.EEXIST and os.path.isdir(path):\n",
|
||
" pass\n",
|
||
" else:\n",
|
||
" raise\n",
|
||
"\n",
|
||
"def fetch_words_data(words_url=WORDS_URL, words_path=WORDS_PATH):\n",
|
||
" os.makedirs(words_path, exist_ok=True)\n",
|
||
" zip_path = os.path.join(words_path, \"words.zip\")\n",
|
||
" if not os.path.exists(zip_path):\n",
|
||
" urllib.request.urlretrieve(words_url, zip_path)\n",
|
||
" with zipfile.ZipFile(zip_path) as f:\n",
|
||
" data = f.read(f.namelist()[0])\n",
|
||
" return data.decode(\"ascii\").split()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"words = fetch_words_data()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"words[:5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Build the dictionary"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from collections import Counter\n",
|
||
"\n",
|
||
"vocabulary_size = 50000\n",
|
||
"\n",
|
||
"vocabulary = [(\"UNK\", None)] + Counter(words).most_common(vocabulary_size - 1)\n",
|
||
"vocabulary = np.array([word for word, _ in vocabulary])\n",
|
||
"dictionary = {word: code for code, word in enumerate(vocabulary)}\n",
|
||
"data = np.array([dictionary.get(word, 0) for word in words])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\" \".join(words[:9]), data[:9]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\" \".join([vocabulary[word_index] for word_index in [5241, 3081, 12, 6, 195, 2, 3134, 46, 59]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"words[24], data[24]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Generate batches"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from collections import deque\n",
|
||
"\n",
|
||
"def generate_batch(batch_size, num_skips, skip_window):\n",
|
||
" global data_index\n",
|
||
" assert batch_size % num_skips == 0\n",
|
||
" assert num_skips <= 2 * skip_window\n",
|
||
" batch = np.ndarray(shape=[batch_size], dtype=np.int32)\n",
|
||
" labels = np.ndarray(shape=[batch_size, 1], dtype=np.int32)\n",
|
||
" span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
|
||
" buffer = deque(maxlen=span)\n",
|
||
" for _ in range(span):\n",
|
||
" buffer.append(data[data_index])\n",
|
||
" data_index = (data_index + 1) % len(data)\n",
|
||
" for i in range(batch_size // num_skips):\n",
|
||
" target = skip_window # target label at the center of the buffer\n",
|
||
" targets_to_avoid = [ skip_window ]\n",
|
||
" for j in range(num_skips):\n",
|
||
" while target in targets_to_avoid:\n",
|
||
" target = np.random.randint(0, span)\n",
|
||
" targets_to_avoid.append(target)\n",
|
||
" batch[i * num_skips + j] = buffer[skip_window]\n",
|
||
" labels[i * num_skips + j, 0] = buffer[target]\n",
|
||
" buffer.append(data[data_index])\n",
|
||
" data_index = (data_index + 1) % len(data)\n",
|
||
" return batch, labels"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 104,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_index = 0\n",
|
||
"batch, labels = generate_batch(8, 2, 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch, [vocabulary[word] for word in batch]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 106,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"labels, [vocabulary[word] for word in labels[:, 0]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Build the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 107,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"batch_size = 128\n",
|
||
"embedding_size = 128 # Dimension of the embedding vector.\n",
|
||
"skip_window = 1 # How many words to consider left and right.\n",
|
||
"num_skips = 2 # How many times to reuse an input to generate a label.\n",
|
||
"\n",
|
||
"# We pick a random validation set to sample nearest neighbors. Here we limit the\n",
|
||
"# validation samples to the words that have a low numeric ID, which by\n",
|
||
"# construction are also the most frequent.\n",
|
||
"valid_size = 16 # Random set of words to evaluate similarity on.\n",
|
||
"valid_window = 100 # Only pick dev samples in the head of the distribution.\n",
|
||
"valid_examples = np.random.choice(valid_window, valid_size, replace=False)\n",
|
||
"num_sampled = 64 # Number of negative examples to sample.\n",
|
||
"\n",
|
||
"learning_rate = 0.01"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"# Input data.\n",
|
||
"train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n",
|
||
"valid_dataset = tf.constant(valid_examples, dtype=tf.int32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 109,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"vocabulary_size = 50000\n",
|
||
"embedding_size = 150\n",
|
||
"\n",
|
||
"# Look up embeddings for inputs.\n",
|
||
"init_embeds = tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)\n",
|
||
"embeddings = tf.Variable(init_embeds)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
|
||
"embed = tf.nn.embedding_lookup(embeddings, train_inputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 111,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Construct the variables for the NCE loss\n",
|
||
"nce_weights = tf.Variable(\n",
|
||
" tf.truncated_normal([vocabulary_size, embedding_size],\n",
|
||
" stddev=1.0 / np.sqrt(embedding_size)))\n",
|
||
"nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n",
|
||
"\n",
|
||
"# Compute the average NCE loss for the batch.\n",
|
||
"# tf.nce_loss automatically draws a new sample of the negative labels each\n",
|
||
"# time we evaluate the loss.\n",
|
||
"loss = tf.reduce_mean(\n",
|
||
" tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed,\n",
|
||
" num_sampled, vocabulary_size))\n",
|
||
"\n",
|
||
"# Construct the Adam optimizer\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"# Compute the cosine similarity between minibatch examples and all embeddings.\n",
|
||
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keepdims=True))\n",
|
||
"normalized_embeddings = embeddings / norm\n",
|
||
"valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
|
||
"similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
|
||
"\n",
|
||
"# Add variable initializer.\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Train the model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 112,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"num_steps = 10001\n",
|
||
"\n",
|
||
"with tf.Session() as session:\n",
|
||
" init.run()\n",
|
||
"\n",
|
||
" average_loss = 0\n",
|
||
" for step in range(num_steps):\n",
|
||
" print(\"\\rIteration: {}\".format(step), end=\"\\t\")\n",
|
||
" batch_inputs, batch_labels = generate_batch(batch_size, num_skips, skip_window)\n",
|
||
" feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}\n",
|
||
"\n",
|
||
" # We perform one update step by evaluating the training op (including it\n",
|
||
" # in the list of returned values for session.run()\n",
|
||
" _, loss_val = session.run([training_op, loss], feed_dict=feed_dict)\n",
|
||
" average_loss += loss_val\n",
|
||
"\n",
|
||
" if step % 2000 == 0:\n",
|
||
" if step > 0:\n",
|
||
" average_loss /= 2000\n",
|
||
" # The average loss is an estimate of the loss over the last 2000 batches.\n",
|
||
" print(\"Average loss at step \", step, \": \", average_loss)\n",
|
||
" average_loss = 0\n",
|
||
"\n",
|
||
" # Note that this is expensive (~20% slowdown if computed every 500 steps)\n",
|
||
" if step % 10000 == 0:\n",
|
||
" sim = similarity.eval()\n",
|
||
" for i in range(valid_size):\n",
|
||
" valid_word = vocabulary[valid_examples[i]]\n",
|
||
" top_k = 8 # number of nearest neighbors\n",
|
||
" nearest = (-sim[i, :]).argsort()[1:top_k+1]\n",
|
||
" log_str = \"Nearest to %s:\" % valid_word\n",
|
||
" for k in range(top_k):\n",
|
||
" close_word = vocabulary[nearest[k]]\n",
|
||
" log_str = \"%s %s,\" % (log_str, close_word)\n",
|
||
" print(log_str)\n",
|
||
"\n",
|
||
" final_embeddings = normalized_embeddings.eval()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's save the final embeddings (of course you can use a TensorFlow `Saver` if you prefer):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.save(\"./my_final_embeddings.npy\", final_embeddings)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Plot the embeddings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_with_labels(low_dim_embs, labels):\n",
|
||
" assert low_dim_embs.shape[0] >= len(labels), \"More labels than embeddings\"\n",
|
||
" plt.figure(figsize=(18, 18)) #in inches\n",
|
||
" for i, label in enumerate(labels):\n",
|
||
" x, y = low_dim_embs[i,:]\n",
|
||
" plt.scatter(x, y)\n",
|
||
" plt.annotate(label,\n",
|
||
" xy=(x, y),\n",
|
||
" xytext=(5, 2),\n",
|
||
" textcoords='offset points',\n",
|
||
" ha='right',\n",
|
||
" va='bottom')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.manifold import TSNE\n",
|
||
"\n",
|
||
"tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)\n",
|
||
"plot_only = 500\n",
|
||
"low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])\n",
|
||
"labels = [vocabulary[i] for i in range(plot_only)]\n",
|
||
"plot_with_labels(low_dim_embs, labels)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Machine Translation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The `basic_rnn_seq2seq()` function creates a simple Encoder/Decoder model: it first runs an RNN to encode `encoder_inputs` into a state vector, then runs a decoder initialized with the last encoder state on `decoder_inputs`. Encoder and decoder use the same RNN cell type but they don't share parameters."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 116,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"n_steps = 50\n",
|
||
"n_neurons = 200\n",
|
||
"n_layers = 3\n",
|
||
"num_encoder_symbols = 20000\n",
|
||
"num_decoder_symbols = 20000\n",
|
||
"embedding_size = 150\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.int32, [None, n_steps]) # English sentences\n",
|
||
"Y = tf.placeholder(tf.int32, [None, n_steps]) # French translations\n",
|
||
"W = tf.placeholder(tf.float32, [None, n_steps - 1, 1])\n",
|
||
"Y_input = Y[:, :-1]\n",
|
||
"Y_target = Y[:, 1:]\n",
|
||
"\n",
|
||
"encoder_inputs = tf.unstack(tf.transpose(X)) # list of 1D tensors\n",
|
||
"decoder_inputs = tf.unstack(tf.transpose(Y_input)) # list of 1D tensors\n",
|
||
"\n",
|
||
"lstm_cells = [tf.nn.rnn_cell.BasicLSTMCell(num_units=n_neurons)\n",
|
||
" for layer in range(n_layers)]\n",
|
||
"cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)\n",
|
||
"\n",
|
||
"output_seqs, states = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(\n",
|
||
" encoder_inputs,\n",
|
||
" decoder_inputs,\n",
|
||
" cell,\n",
|
||
" num_encoder_symbols,\n",
|
||
" num_decoder_symbols,\n",
|
||
" embedding_size)\n",
|
||
"\n",
|
||
"logits = tf.transpose(tf.unstack(output_seqs), perm=[1, 0, 2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 117,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
|
||
"Y_target_flat = tf.reshape(Y_target, [-1])\n",
|
||
"W_flat = tf.reshape(W, [-1])\n",
|
||
"xentropy = W_flat * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y_target_flat, logits=logits_flat)\n",
|
||
"loss = tf.reduce_mean(xentropy)\n",
|
||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"source": [
|
||
"# Exercise solutions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 1. to 6."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"See Appendix A."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 7. Embedded Reber Grammars"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First we need to build a function that generates strings based on a grammar. The grammar will be represented as a list of possible transitions for each state. A transition specifies the string to output (or a grammar to generate it) and the next state."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 118,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"default_reber_grammar = [\n",
|
||
" [(\"B\", 1)], # (state 0) =B=>(state 1)\n",
|
||
" [(\"T\", 2), (\"P\", 3)], # (state 1) =T=>(state 2) or =P=>(state 3)\n",
|
||
" [(\"S\", 2), (\"X\", 4)], # (state 2) =S=>(state 2) or =X=>(state 4)\n",
|
||
" [(\"T\", 3), (\"V\", 5)], # and so on...\n",
|
||
" [(\"X\", 3), (\"S\", 6)],\n",
|
||
" [(\"P\", 4), (\"V\", 6)],\n",
|
||
" [(\"E\", None)]] # (state 6) =E=>(terminal state)\n",
|
||
"\n",
|
||
"embedded_reber_grammar = [\n",
|
||
" [(\"B\", 1)],\n",
|
||
" [(\"T\", 2), (\"P\", 3)],\n",
|
||
" [(default_reber_grammar, 4)],\n",
|
||
" [(default_reber_grammar, 5)],\n",
|
||
" [(\"T\", 6)],\n",
|
||
" [(\"P\", 6)],\n",
|
||
" [(\"E\", None)]]\n",
|
||
"\n",
|
||
"def generate_string(grammar):\n",
|
||
" state = 0\n",
|
||
" output = []\n",
|
||
" while state is not None:\n",
|
||
" index = np.random.randint(len(grammar[state]))\n",
|
||
" production, state = grammar[state][index]\n",
|
||
" if isinstance(production, list):\n",
|
||
" production = generate_string(grammar=production)\n",
|
||
" output.append(production)\n",
|
||
" return \"\".join(output)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's generate a few strings based on the default Reber grammar:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 119,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for _ in range(25):\n",
|
||
" print(generate_string(default_reber_grammar), end=\" \")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Looks good. Now let's generate a few strings based on the embedded Reber grammar:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 120,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for _ in range(25):\n",
|
||
" print(generate_string(embedded_reber_grammar), end=\" \")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Okay, now we need a function to generate strings that do not respect the grammar. We could generate a random string, but the task would be a bit too easy, so instead we will generate a string that respects the grammar, and we will corrupt it by changing just one character:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 121,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def generate_corrupted_string(grammar, chars=\"BEPSTVX\"):\n",
|
||
" good_string = generate_string(grammar)\n",
|
||
" index = np.random.randint(len(good_string))\n",
|
||
" good_char = good_string[index]\n",
|
||
" bad_char = np.random.choice(sorted(set(chars) - set(good_char)))\n",
|
||
" return good_string[:index] + bad_char + good_string[index + 1:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's look at a few corrupted strings:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 122,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for _ in range(25):\n",
|
||
" print(generate_corrupted_string(embedded_reber_grammar), end=\" \")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"It's not possible to feed a string directly to an RNN: we need to convert it to a sequence of vectors, first. Each vector will represent a single letter, using a one-hot encoding. For example, the letter \"B\" will be represented as the vector `[1, 0, 0, 0, 0, 0, 0]`, the letter E will be represented as `[0, 1, 0, 0, 0, 0, 0]` and so on. Let's write a function that converts a string to a sequence of such one-hot vectors. Note that if the string is shorted than `n_steps`, it will be padded with zero vectors (later, we will tell TensorFlow how long each string actually is using the `sequence_length` parameter)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 123,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def string_to_one_hot_vectors(string, n_steps, chars=\"BEPSTVX\"):\n",
|
||
" char_to_index = {char: index for index, char in enumerate(chars)}\n",
|
||
" output = np.zeros((n_steps, len(chars)), dtype=np.int32)\n",
|
||
" for index, char in enumerate(string):\n",
|
||
" output[index, char_to_index[char]] = 1.\n",
|
||
" return output"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 124,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"string_to_one_hot_vectors(\"BTBTXSETE\", 12)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"We can now generate the dataset, with 50% good strings, and 50% bad strings:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 125,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def generate_dataset(size):\n",
|
||
" good_strings = [generate_string(embedded_reber_grammar)\n",
|
||
" for _ in range(size // 2)]\n",
|
||
" bad_strings = [generate_corrupted_string(embedded_reber_grammar)\n",
|
||
" for _ in range(size - size // 2)]\n",
|
||
" all_strings = good_strings + bad_strings\n",
|
||
" n_steps = max([len(string) for string in all_strings])\n",
|
||
" X = np.array([string_to_one_hot_vectors(string, n_steps)\n",
|
||
" for string in all_strings])\n",
|
||
" seq_length = np.array([len(string) for string in all_strings])\n",
|
||
" y = np.array([[1] for _ in range(len(good_strings))] +\n",
|
||
" [[0] for _ in range(len(bad_strings))])\n",
|
||
" rnd_idx = np.random.permutation(size)\n",
|
||
" return X[rnd_idx], seq_length[rnd_idx], y[rnd_idx]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 126,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train, l_train, y_train = generate_dataset(10000)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's take a look at the first training instances:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 127,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"It's padded with a lot of zeros because the longest string in the dataset is that long. How long is this particular string?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 128,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"l_train[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"What class is it?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 129,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_train[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Perfect! We are ready to create the RNN to identify good strings. We build a sequence classifier very similar to the one we built earlier to classify MNIST images, with two main differences:\n",
|
||
"* First, the input strings have variable length, so we need to specify the `sequence_length` when calling the `dynamic_rnn()` function.\n",
|
||
"* Second, this is a binary classifier, so we only need one output neuron that will output, for each input string, the estimated log probability that it is a good string. For multiclass classification, we used `sparse_softmax_cross_entropy_with_logits()` but for binary classification we use `sigmoid_cross_entropy_with_logits()`.\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 130,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"reset_graph()\n",
|
||
"\n",
|
||
"possible_chars = \"BEPSTVX\"\n",
|
||
"n_inputs = len(possible_chars)\n",
|
||
"n_neurons = 30\n",
|
||
"n_outputs = 1\n",
|
||
"\n",
|
||
"learning_rate = 0.02\n",
|
||
"momentum = 0.95\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, [None, None, n_inputs], name=\"X\")\n",
|
||
"seq_length = tf.placeholder(tf.int32, [None], name=\"seq_length\")\n",
|
||
"y = tf.placeholder(tf.float32, [None, 1], name=\"y\")\n",
|
||
"\n",
|
||
"gru_cell = tf.nn.rnn_cell.GRUCell(num_units=n_neurons)\n",
|
||
"outputs, states = tf.nn.dynamic_rnn(gru_cell, X, dtype=tf.float32,\n",
|
||
" sequence_length=seq_length)\n",
|
||
"\n",
|
||
"logits = tf.layers.dense(states, n_outputs, name=\"logits\")\n",
|
||
"y_pred = tf.cast(tf.greater(logits, 0.), tf.float32, name=\"y_pred\")\n",
|
||
"y_proba = tf.nn.sigmoid(logits, name=\"y_proba\")\n",
|
||
"\n",
|
||
"xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
||
"optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,\n",
|
||
" momentum=momentum,\n",
|
||
" use_nesterov=True)\n",
|
||
"training_op = optimizer.minimize(loss)\n",
|
||
"\n",
|
||
"correct = tf.equal(y_pred, y, name=\"correct\")\n",
|
||
"accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name=\"accuracy\")\n",
|
||
"\n",
|
||
"init = tf.global_variables_initializer()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now let's generate a validation set so we can track progress during training:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 131,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_val, l_val, y_val = generate_dataset(5000)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 132,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 50\n",
|
||
"batch_size = 50\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" X_batches = np.array_split(X_train, len(X_train) // batch_size)\n",
|
||
" l_batches = np.array_split(l_train, len(l_train) // batch_size)\n",
|
||
" y_batches = np.array_split(y_train, len(y_train) // batch_size)\n",
|
||
" for X_batch, l_batch, y_batch in zip(X_batches, l_batches, y_batches):\n",
|
||
" loss_val, _ = sess.run(\n",
|
||
" [loss, training_op],\n",
|
||
" feed_dict={X: X_batch, seq_length: l_batch, y: y_batch})\n",
|
||
" acc_train = accuracy.eval(feed_dict={X: X_batch, seq_length: l_batch, y: y_batch})\n",
|
||
" acc_val = accuracy.eval(feed_dict={X: X_val, seq_length: l_val, y: y_val})\n",
|
||
" print(\"{:4d} Train loss: {:.4f}, accuracy: {:.2f}% Validation accuracy: {:.2f}%\".format(\n",
|
||
" epoch, loss_val, 100 * acc_train, 100 * acc_val))\n",
|
||
" saver.save(sess, \"./my_reber_classifier\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now let's test our RNN on two tricky strings: the first one is bad while the second one is good. They only differ by the second to last character. If the RNN gets this right, it shows that it managed to notice the pattern that the second letter should always be equal to the second to last letter. That requires a fairly long short-term memory (which is the reason why we used a GRU cell)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 133,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_strings = [\n",
|
||
" \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVETE\",\n",
|
||
" \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVEPE\"]\n",
|
||
"l_test = np.array([len(s) for s in test_strings])\n",
|
||
"max_length = l_test.max()\n",
|
||
"X_test = [string_to_one_hot_vectors(s, n_steps=max_length)\n",
|
||
" for s in test_strings]\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" saver.restore(sess, \"./my_reber_classifier\")\n",
|
||
" y_proba_val = y_proba.eval(feed_dict={X: X_test, seq_length: l_test})\n",
|
||
"\n",
|
||
"print()\n",
|
||
"print(\"Estimated probability that these are Reber strings:\")\n",
|
||
"for index, string in enumerate(test_strings):\n",
|
||
" print(\"{}: {:.2f}%\".format(string, 100 * y_proba_val[index][0]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Ta-da! It worked fine. The RNN found the correct answers with high confidence. :)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 8. and 9."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Coming soon..."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.8"
|
||
},
|
||
"nav_menu": {},
|
||
"toc": {
|
||
"navigate_menu": true,
|
||
"number_sections": true,
|
||
"sideBar": true,
|
||
"threshold": 6,
|
||
"toc_cell": false,
|
||
"toc_section_display": "block",
|
||
"toc_window_display": false
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 1
|
||
}
|