1300 lines
36 KiB
Plaintext
1300 lines
36 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Chapter 15 – Processing Sequences Using RNNs and CNNs**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"_This notebook contains all the sample code in chapter 15._"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Setup"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Python ≥3.5 is required\n",
|
|||
|
"import sys\n",
|
|||
|
"assert sys.version_info >= (3, 5)\n",
|
|||
|
"\n",
|
|||
|
"# Scikit-Learn ≥0.20 is required\n",
|
|||
|
"import sklearn\n",
|
|||
|
"assert sklearn.__version__ >= \"0.20\"\n",
|
|||
|
"\n",
|
|||
|
"# TensorFlow ≥2.0-preview is required\n",
|
|||
|
"import tensorflow as tf\n",
|
|||
|
"from tensorflow import keras\n",
|
|||
|
"assert tf.__version__ >= \"2.0\"\n",
|
|||
|
"\n",
|
|||
|
"# Common imports\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import os\n",
|
|||
|
"\n",
|
|||
|
"# to make this notebook's output stable across runs\n",
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"# To plot pretty figures\n",
|
|||
|
"%matplotlib inline\n",
|
|||
|
"import matplotlib as mpl\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"mpl.rc('axes', labelsize=14)\n",
|
|||
|
"mpl.rc('xtick', labelsize=12)\n",
|
|||
|
"mpl.rc('ytick', labelsize=12)\n",
|
|||
|
"\n",
|
|||
|
"# Where to save the figures\n",
|
|||
|
"PROJECT_ROOT_DIR = \".\"\n",
|
|||
|
"CHAPTER_ID = \"rnn\"\n",
|
|||
|
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
|
|||
|
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
|
|||
|
"\n",
|
|||
|
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
|
|||
|
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
|
|||
|
" print(\"Saving figure\", fig_id)\n",
|
|||
|
" if tight_layout:\n",
|
|||
|
" plt.tight_layout()\n",
|
|||
|
" plt.savefig(path, format=fig_extension, dpi=resolution)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Basic RNNs"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Generate the Dataset"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def generate_time_series(batch_size, n_steps):\n",
|
|||
|
" freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)\n",
|
|||
|
" time = np.linspace(0, 1, n_steps)\n",
|
|||
|
" series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10)) # wave 1\n",
|
|||
|
" series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2\n",
|
|||
|
" series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5) # + noise\n",
|
|||
|
" return series[..., np.newaxis].astype(np.float32)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"\n",
|
|||
|
"n_steps = 50\n",
|
|||
|
"series = generate_time_series(10000, n_steps + 1)\n",
|
|||
|
"X_train, y_train = series[:7000, :n_steps], series[:7000, -1]\n",
|
|||
|
"X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]\n",
|
|||
|
"X_test, y_test = series[9000:, :n_steps], series[9000:, -1]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"X_train.shape, y_train.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\"):\n",
|
|||
|
" plt.plot(series, \".-\")\n",
|
|||
|
" if y is not None:\n",
|
|||
|
" plt.plot(n_steps, y, \"bx\", markersize=10)\n",
|
|||
|
" if y_pred is not None:\n",
|
|||
|
" plt.plot(n_steps, y_pred, \"ro\")\n",
|
|||
|
" plt.grid(True)\n",
|
|||
|
" if x_label:\n",
|
|||
|
" plt.xlabel(x_label, fontsize=16)\n",
|
|||
|
" if y_label:\n",
|
|||
|
" plt.ylabel(y_label, fontsize=16, rotation=0)\n",
|
|||
|
" plt.hlines(0, 0, 100, linewidth=1)\n",
|
|||
|
" plt.axis([0, n_steps + 1, -1, 1])\n",
|
|||
|
"\n",
|
|||
|
"fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))\n",
|
|||
|
"for col in range(3):\n",
|
|||
|
" plt.sca(axes[col])\n",
|
|||
|
" plot_series(X_valid[col, :, 0], y_valid[col, 0],\n",
|
|||
|
" y_label=(\"$x(t)$\" if col==0 else None))\n",
|
|||
|
"save_fig(\"time_series_plot\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Computing Some Baselines"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Naive predictions (just predict the last observed value):"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred = X_valid[:, -1]\n",
|
|||
|
"np.mean(keras.losses.mean_squared_error(y_valid, y_pred))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Linear predictions:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.Flatten(input_shape=[50, 1]),\n",
|
|||
|
" keras.layers.Dense(1)\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
|||
|
"history = model.fit(X_train, y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.evaluate(X_valid, y_valid)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def plot_learning_curves(loss, val_loss):\n",
|
|||
|
" plt.plot(np.arange(len(loss)) + 0.5, loss, \"b.-\", label=\"Training loss\")\n",
|
|||
|
" plt.plot(np.arange(len(val_loss)) + 1, val_loss, \"r.-\", label=\"Validation loss\")\n",
|
|||
|
" plt.gca().xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))\n",
|
|||
|
" plt.axis([1, 20, 0, 0.05])\n",
|
|||
|
" plt.legend(fontsize=14)\n",
|
|||
|
" plt.xlabel(\"Epochs\")\n",
|
|||
|
" plt.ylabel(\"Loss\")\n",
|
|||
|
" plt.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred = model.predict(X_valid)\n",
|
|||
|
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Using a Simple RNN"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.SimpleRNN(1, input_shape=[None, 1])\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"optimizer = keras.optimizers.Adam(lr=0.005)\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=optimizer)\n",
|
|||
|
"history = model.fit(X_train, y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.evaluate(X_valid, y_valid)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred = model.predict(X_valid)\n",
|
|||
|
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Deep RNNs"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
|||
|
" keras.layers.SimpleRNN(1)\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
|||
|
"history = model.fit(X_train, y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.evaluate(X_valid, y_valid)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred = model.predict(X_valid)\n",
|
|||
|
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Make the second `SimpleRNN` layer return only the last output:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.SimpleRNN(20),\n",
|
|||
|
" keras.layers.Dense(1)\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
|||
|
"history = model.fit(X_train, y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.evaluate(X_valid, y_valid)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y_pred = model.predict(X_valid)\n",
|
|||
|
"plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Forecasting Several Steps Ahead"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(43) # not 42, as it would give the first series in the train set\n",
|
|||
|
"\n",
|
|||
|
"series = generate_time_series(1, n_steps + 10)\n",
|
|||
|
"X_new, Y_new = series[:, :n_steps], series[:, n_steps:]\n",
|
|||
|
"X = X_new\n",
|
|||
|
"for step_ahead in range(10):\n",
|
|||
|
" y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]\n",
|
|||
|
" X = np.concatenate([X, y_pred_one], axis=1)\n",
|
|||
|
"\n",
|
|||
|
"Y_pred = X[:, n_steps:]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"Y_pred.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def plot_multiple_forecasts(X, Y, Y_pred):\n",
|
|||
|
" n_steps = X.shape[1]\n",
|
|||
|
" ahead = Y.shape[1]\n",
|
|||
|
" plot_series(X[0, :, 0])\n",
|
|||
|
" plt.plot(np.arange(n_steps, n_steps + ahead), Y_pred[0, :, 0], \"ro-\")\n",
|
|||
|
" plt.plot(np.arange(n_steps, n_steps + ahead), Y[0, :, 0], \"bx-\", markersize=10)\n",
|
|||
|
" plt.axis([0, n_steps + ahead, -1, 1])\n",
|
|||
|
"\n",
|
|||
|
"plot_multiple_forecasts(X_new, Y_new, Y_pred)\n",
|
|||
|
"save_fig(\"forecast_ahead_plot\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Now let's create an RNN that predicts all 10 next values at once:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"\n",
|
|||
|
"n_steps = 50\n",
|
|||
|
"series = generate_time_series(10000, n_steps + 10)\n",
|
|||
|
"X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n",
|
|||
|
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n",
|
|||
|
"X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1)),\n",
|
|||
|
" keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(43)\n",
|
|||
|
"\n",
|
|||
|
"series = generate_time_series(1, 50 + 10)\n",
|
|||
|
"X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n",
|
|||
|
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_multiple_forecasts(X_new, Y_new, Y_pred)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future. That is, instead of just forecasting time steps 50 to 59 based on time steps 0 to 49, it will forecast time steps 10 to 59 based on time steps 0 to 49 (the time steps 10 to 49 are in the input, but the model is causal so at any time step it cannot see the future inputs):"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"\n",
|
|||
|
"n_steps = 50\n",
|
|||
|
"series = generate_time_series(10000, n_steps + 10)\n",
|
|||
|
"X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n",
|
|||
|
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n",
|
|||
|
"X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"X_train.shape, Y_train.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"def last_10_time_steps_mse(Y_true, Y_pred):\n",
|
|||
|
" return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(43)\n",
|
|||
|
"\n",
|
|||
|
"series = generate_time_series(1, 50 + 10)\n",
|
|||
|
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
|||
|
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_multiple_forecasts(X_new, Y_new, Y_pred)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Deep RNN with Batch Norm"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.BatchNormalization(),\n",
|
|||
|
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
|||
|
" keras.layers.BatchNormalization(),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Deep RNNs with Layer Norm"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from tensorflow.keras.layers.experimental import LayerNormalization"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"class LNSimpleRNNCell(keras.layers.Layer):\n",
|
|||
|
" def __init__(self, units, activation=\"tanh\", **kwargs):\n",
|
|||
|
" super().__init__(**kwargs)\n",
|
|||
|
" self.state_size = units\n",
|
|||
|
" self.output_size = units\n",
|
|||
|
" self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,\n",
|
|||
|
" activation=None)\n",
|
|||
|
" self.layer_norm = LayerNormalization()\n",
|
|||
|
" self.activation = keras.activations.get(activation)\n",
|
|||
|
" def get_initial_state(self, inputs=None, batch_size=None, dtype=None):\n",
|
|||
|
" if inputs is not None:\n",
|
|||
|
" batch_size = tf.shape(inputs)[0]\n",
|
|||
|
" dtype = inputs.dtype\n",
|
|||
|
" return [tf.zeros([batch_size, self.state_size], dtype=dtype)]\n",
|
|||
|
" def call(self, inputs, states):\n",
|
|||
|
" outputs, new_states = self.simple_rnn_cell(inputs, states)\n",
|
|||
|
" norm_outputs = self.activation(self.layer_norm(outputs))\n",
|
|||
|
" return norm_outputs, [norm_outputs]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True,\n",
|
|||
|
" input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Creating a Custom RNN Class"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"class MyRNN(keras.layers.Layer):\n",
|
|||
|
" def __init__(self, cell, return_sequences=False, **kwargs):\n",
|
|||
|
" super().__init__(**kwargs)\n",
|
|||
|
" self.cell = cell\n",
|
|||
|
" self.return_sequences = return_sequences\n",
|
|||
|
" self.get_initial_state = getattr(\n",
|
|||
|
" self.cell, \"get_initial_state\", self.fallback_initial_state)\n",
|
|||
|
" def fallback_initial_state(self, inputs):\n",
|
|||
|
" return [tf.zeros([self.cell.state_size], dtype=inputs.dtype)]\n",
|
|||
|
" @tf.function\n",
|
|||
|
" def call(self, inputs):\n",
|
|||
|
" states = self.get_initial_state(inputs)\n",
|
|||
|
" n_steps = tf.shape(inputs)[1]\n",
|
|||
|
" if self.return_sequences:\n",
|
|||
|
" sequences = tf.TensorArray(inputs.dtype, size=n_steps)\n",
|
|||
|
" outputs = tf.zeros(shape=[n_steps, self.cell.output_size], dtype=inputs.dtype)\n",
|
|||
|
" for step in tf.range(n_steps):\n",
|
|||
|
" outputs, states = self.cell(inputs[:, step], states)\n",
|
|||
|
" if self.return_sequences:\n",
|
|||
|
" sequences = sequences.write(step, outputs)\n",
|
|||
|
" if self.return_sequences:\n",
|
|||
|
" return sequences.stack()\n",
|
|||
|
" else:\n",
|
|||
|
" return outputs"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" MyRNN(LNSimpleRNNCell(20), return_sequences=True,\n",
|
|||
|
" input_shape=[None, 1]),\n",
|
|||
|
" MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# LSTMs"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.LSTM(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.LSTM(20, return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.evaluate(X_valid, Y_valid)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(43)\n",
|
|||
|
"\n",
|
|||
|
"series = generate_time_series(1, 50 + 10)\n",
|
|||
|
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
|||
|
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 46,
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_multiple_forecasts(X_new, Y_new, Y_pred)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# GRUs"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 47,
|
|||
|
"metadata": {
|
|||
|
"scrolled": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.GRU(20, return_sequences=True, input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.GRU(20, return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 48,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.evaluate(X_valid, Y_valid)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 49,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 50,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(43)\n",
|
|||
|
"\n",
|
|||
|
"series = generate_time_series(1, 50 + 10)\n",
|
|||
|
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
|||
|
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 51,
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"plot_multiple_forecasts(X_new, Y_new, Y_pred)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Using One-Dimensional Convolutional Layers to Process Sequences"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"```\n",
|
|||
|
"1D conv layer with kernel size 4, stride 2, VALID padding:\n",
|
|||
|
"\n",
|
|||
|
" |-----2----| |-----5---... |----23-----|\n",
|
|||
|
" |-----1----| |-----4-----| ... |-----22----|\n",
|
|||
|
" |-----0----| |-----3----| |---...-21---|\n",
|
|||
|
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
|
|||
|
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
|||
|
"\n",
|
|||
|
"Output:\n",
|
|||
|
"\n",
|
|||
|
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
|
|||
|
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
|
|||
|
"```"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 52,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"def last_5_time_steps_mse(Y_true, Y_pred):\n",
|
|||
|
" return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential([\n",
|
|||
|
" keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\",\n",
|
|||
|
" input_shape=[None, 1]),\n",
|
|||
|
" keras.layers.GRU(20, return_sequences=True),\n",
|
|||
|
" keras.layers.GRU(20, return_sequences=True),\n",
|
|||
|
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid[:, 3::2]))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## WaveNet"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"```\n",
|
|||
|
"C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n",
|
|||
|
" / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n",
|
|||
|
" / \\ / \\ / \\ / \\\n",
|
|||
|
"C1 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n",
|
|||
|
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
|
|||
|
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
|||
|
"\n",
|
|||
|
"Output:\n",
|
|||
|
"\n",
|
|||
|
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
|
|||
|
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
|
|||
|
"```"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 53,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Sequential()\n",
|
|||
|
"model.add(keras.layers.InputLayer(input_shape=[None, 1]))\n",
|
|||
|
"for rate in (1, 2, 4, 8) * 2:\n",
|
|||
|
" model.add(keras.layers.Lambda(\n",
|
|||
|
" lambda inputs: keras.backend.temporal_padding(inputs, (rate, 0))))\n",
|
|||
|
" model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n",
|
|||
|
" activation=\"relu\", dilation_rate=rate))\n",
|
|||
|
"model.add(keras.layers.Conv1D(filters=1, kernel_size=1))\n",
|
|||
|
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
|||
|
" validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Here is the original WaveNet defined in the paper: it uses Gated Activation Units instead of ReLU and parametrized skip connections, plus it pads with zeros on the left to avoid getting shorter and shorter sequences:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 54,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from tensorflow import keras\n",
|
|||
|
"\n",
|
|||
|
"class GatedActivationUnit(keras.layers.Layer):\n",
|
|||
|
" def __init__(self, activation=\"tanh\", **kwargs):\n",
|
|||
|
" super().__init__(**kwargs)\n",
|
|||
|
" self.activation = keras.activations.get(activation)\n",
|
|||
|
" def call(self, inputs):\n",
|
|||
|
" n_filters = inputs.shape[-1] // 2\n",
|
|||
|
" linear_output = self.activation(inputs[..., :n_filters])\n",
|
|||
|
" gate = keras.activations.sigmoid(inputs[..., n_filters:])\n",
|
|||
|
" return self.activation(linear_output) * gate"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 55,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def wavenet_residual_block(inputs, n_filters, dilation_rate):\n",
|
|||
|
" z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n",
|
|||
|
" z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n",
|
|||
|
" dilation_rate=dilation_rate)(z)\n",
|
|||
|
" z = GatedActivationUnit()(z)\n",
|
|||
|
" z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n",
|
|||
|
" return keras.layers.Add()([z, inputs]), z"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 56,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"tf.random.set_seed(42)\n",
|
|||
|
"\n",
|
|||
|
"n_layers_per_block = 10\n",
|
|||
|
"n_blocks = 3\n",
|
|||
|
"n_filters = 128\n",
|
|||
|
"n_outputs = 256\n",
|
|||
|
"\n",
|
|||
|
"inputs = keras.layers.Input(shape=[None, 1])\n",
|
|||
|
"z = keras.backend.temporal_padding(inputs, (1, 0))\n",
|
|||
|
"z = keras.layers.Conv1D(n_filters, kernel_size=2)(z)\n",
|
|||
|
"skip_to_last = []\n",
|
|||
|
"for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n",
|
|||
|
" z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n",
|
|||
|
" skip_to_last.append(skip)\n",
|
|||
|
"z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
|
|||
|
"z = keras.layers.Conv1D(n_filters, kernel_size=1, activation=\"relu\")(z)\n",
|
|||
|
"Y_proba = keras.layers.Conv1D(n_outputs, kernel_size=1, activation=\"softmax\")(z)\n",
|
|||
|
"\n",
|
|||
|
"model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 57,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n",
|
|||
|
"history = model.fit(X_train, Y_train, epochs=2, validation_data=(X_valid, Y_valid))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"In this chapter we explored the fundamentals of RNNs and used them to process sequences (namely, time series). In the process we also looked at other ways to process sequences, including CNNs. In the next chapter we will use RNNs for Natural Language Processing, and we will learn more about RNNs (bidirectional RNNs, stateful vs stateless RNNs, Encoder–Decoders, and Attention-augmented Encoder-Decoders). We will also look at the Transformer, an Attention-only architecture."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Exercise solutions"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## 1. to 6."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"See Appendix A."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## 7. Embedded Reber Grammars"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"First we need to build a function that generates strings based on a grammar. The grammar will be represented as a list of possible transitions for each state. A transition specifies the string to output (or a grammar to generate it) and the next state."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 58,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"\n",
|
|||
|
"default_reber_grammar = [\n",
|
|||
|
" [(\"B\", 1)], # (state 0) =B=>(state 1)\n",
|
|||
|
" [(\"T\", 2), (\"P\", 3)], # (state 1) =T=>(state 2) or =P=>(state 3)\n",
|
|||
|
" [(\"S\", 2), (\"X\", 4)], # (state 2) =S=>(state 2) or =X=>(state 4)\n",
|
|||
|
" [(\"T\", 3), (\"V\", 5)], # and so on...\n",
|
|||
|
" [(\"X\", 3), (\"S\", 6)],\n",
|
|||
|
" [(\"P\", 4), (\"V\", 6)],\n",
|
|||
|
" [(\"E\", None)]] # (state 6) =E=>(terminal state)\n",
|
|||
|
"\n",
|
|||
|
"embedded_reber_grammar = [\n",
|
|||
|
" [(\"B\", 1)],\n",
|
|||
|
" [(\"T\", 2), (\"P\", 3)],\n",
|
|||
|
" [(default_reber_grammar, 4)],\n",
|
|||
|
" [(default_reber_grammar, 5)],\n",
|
|||
|
" [(\"T\", 6)],\n",
|
|||
|
" [(\"P\", 6)],\n",
|
|||
|
" [(\"E\", None)]]\n",
|
|||
|
"\n",
|
|||
|
"def generate_string(grammar):\n",
|
|||
|
" state = 0\n",
|
|||
|
" output = []\n",
|
|||
|
" while state is not None:\n",
|
|||
|
" index = np.random.randint(len(grammar[state]))\n",
|
|||
|
" production, state = grammar[state][index]\n",
|
|||
|
" if isinstance(production, list):\n",
|
|||
|
" production = generate_string(grammar=production)\n",
|
|||
|
" output.append(production)\n",
|
|||
|
" return \"\".join(output)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Let's generate a few strings based on the default Reber grammar:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 59,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"for _ in range(25):\n",
|
|||
|
" print(generate_string(default_reber_grammar), end=\" \")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Looks good. Now let's generate a few strings based on the embedded Reber grammar:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 60,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"for _ in range(25):\n",
|
|||
|
" print(generate_string(embedded_reber_grammar), end=\" \")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Okay, now we need a function to generate strings that do not respect the grammar. We could generate a random string, but the task would be a bit too easy, so instead we will generate a string that respects the grammar, and we will corrupt it by changing just one character:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 61,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def generate_corrupted_string(grammar, chars=\"BEPSTVX\"):\n",
|
|||
|
" good_string = generate_string(grammar)\n",
|
|||
|
" index = np.random.randint(len(good_string))\n",
|
|||
|
" good_char = good_string[index]\n",
|
|||
|
" bad_char = np.random.choice(sorted(set(chars) - set(good_char)))\n",
|
|||
|
" return good_string[:index] + bad_char + good_string[index + 1:]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Let's look at a few corrupted strings:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 62,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"for _ in range(25):\n",
|
|||
|
" print(generate_corrupted_string(embedded_reber_grammar), end=\" \")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"To be continued..."
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.6.8"
|
|||
|
},
|
|||
|
"nav_menu": {},
|
|||
|
"toc": {
|
|||
|
"navigate_menu": true,
|
|||
|
"number_sections": true,
|
|||
|
"sideBar": true,
|
|||
|
"threshold": 6,
|
|||
|
"toc_cell": false,
|
|||
|
"toc_section_display": "block",
|
|||
|
"toc_window_display": false
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 1
|
|||
|
}
|