diff --git a/work_in_progress/14_recurrent_neural_networks.ipynb b/15_recurrent_neural_networks.ipynb
similarity index 61%
rename from work_in_progress/14_recurrent_neural_networks.ipynb
rename to 15_recurrent_neural_networks.ipynb
index d84171e..635be5a 100644
--- a/work_in_progress/14_recurrent_neural_networks.ipynb
+++ b/15_recurrent_neural_networks.ipynb
@@ -4,14 +4,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Chapter 14 – Recurrent Neural Networks**"
+    "**Chapter 15 – Recurrent Neural Networks**"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_This notebook contains all the sample code and solutions to the exercises in chapter 14._"
+    "_This notebook contains all the sample code in chapter 15._"
    ]
   },
   {
@@ -25,62 +25,56 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+    "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# To support both python 2 and python 3\n",
-    "from __future__ import division, print_function, unicode_literals\n",
+    "# Python ≥3.5 is required\n",
+    "import sys\n",
+    "assert sys.version_info >= (3, 5)\n",
+    "\n",
+    "# Scikit-Learn ≥0.20 is required\n",
+    "import sklearn\n",
+    "assert sklearn.__version__ >= \"0.20\"\n",
+    "\n",
+    "# TensorFlow ≥2.0-preview is required\n",
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "assert tf.__version__ >= \"2.0\"\n",
     "\n",
     "# Common imports\n",
     "import numpy as np\n",
     "import os\n",
     "\n",
     "# to make this notebook's output stable across runs\n",
-    "def reset_graph(seed=42):\n",
-    "    tf.reset_default_graph()\n",
-    "    tf.set_random_seed(seed)\n",
-    "    np.random.seed(seed)\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
     "\n",
     "# To plot pretty figures\n",
     "%matplotlib inline\n",
-    "import matplotlib\n",
+    "import matplotlib as mpl\n",
     "import matplotlib.pyplot as plt\n",
-    "plt.rcParams['axes.labelsize'] = 14\n",
-    "plt.rcParams['xtick.labelsize'] = 12\n",
-    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "mpl.rc('axes', labelsize=14)\n",
+    "mpl.rc('xtick', labelsize=12)\n",
+    "mpl.rc('ytick', labelsize=12)\n",
     "\n",
     "# Where to save the figures\n",
     "PROJECT_ROOT_DIR = \".\"\n",
     "CHAPTER_ID = \"rnn\"\n",
+    "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
+    "os.makedirs(IMAGES_PATH, exist_ok=True)\n",
     "\n",
-    "def save_fig(fig_id, tight_layout=True):\n",
-    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
+    "    path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
     "    print(\"Saving figure\", fig_id)\n",
     "    if tight_layout:\n",
     "        plt.tight_layout()\n",
-    "    plt.savefig(path, format='png', dpi=300)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Then of course we will need TensorFlow:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import tensorflow as tf"
+    "    plt.savefig(path, format=fig_extension, dpi=resolution)"
    ]
   },
   {
@@ -94,9 +88,1561 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Manual RNN"
+    "### Generate the Dataset"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 493,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_time_series(batch_size, n_steps):\n",
+    "    freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)\n",
+    "    time = np.linspace(0, 1, n_steps)\n",
+    "    series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))  #   wave 1\n",
+    "    series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2\n",
+    "    series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5)   # + noise\n",
+    "    return series[..., np.newaxis].astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 494,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "\n",
+    "n_steps = 50\n",
+    "series = generate_time_series(10000, n_steps + 1)\n",
+    "X_train, y_train = series[:7000, :n_steps], series[:7000, -1]\n",
+    "X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]\n",
+    "X_test, y_test = series[9000:, :n_steps], series[9000:, -1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 495,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train.shape, y_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 496,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\"):\n",
+    "    plt.plot(series, \".-\")\n",
+    "    if y is not None:\n",
+    "        plt.plot(n_steps, y, \"bx\", markersize=10)\n",
+    "    if y_pred is not None:\n",
+    "        plt.plot(n_steps, y_pred, \"ro\")\n",
+    "    plt.grid(True)\n",
+    "    if x_label:\n",
+    "        plt.xlabel(x_label, fontsize=16)\n",
+    "    if y_label:\n",
+    "        plt.ylabel(y_label, fontsize=16, rotation=0)\n",
+    "    plt.hlines(0, 0, 100, linewidth=1)\n",
+    "    plt.axis([0, n_steps + 1, -1, 1])\n",
+    "\n",
+    "fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))\n",
+    "for col in range(3):\n",
+    "    plt.sca(axes[col])\n",
+    "    plot_series(X_valid[col, :, 0], y_valid[col, 0],\n",
+    "                y_label=(\"$x(t)$\" if col==0 else None))\n",
+    "save_fig(\"time_series_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Computing Some Baselines"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Naive predictions (just predict the last observed value):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 497,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = X_valid[:, -1]\n",
+    "np.mean(keras.losses.mean_squared_error(y_valid, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 498,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Linear predictions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 499,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.Flatten(),\n",
+    "    keras.layers.Dense(1)\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\")\n",
+    "history = model.fit(X_train, y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 500,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.evaluate(X_valid, y_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 501,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_learning_curves(loss, val_loss):\n",
+    "    plt.plot(np.arange(len(loss)) + 0.5, loss, \"b.-\", label=\"Training loss\")\n",
+    "    plt.plot(np.arange(len(val_loss)) + 1, val_loss, \"r.-\", label=\"Validation loss\")\n",
+    "    plt.gca().xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))\n",
+    "    plt.axis([1, 20, 0, 0.05])\n",
+    "    plt.legend(fontsize=14)\n",
+    "    plt.xlabel(\"Epochs\")\n",
+    "    plt.ylabel(\"Loss\")\n",
+    "    plt.grid(True)\n",
+    "\n",
+    "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 502,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = model.predict(X_valid)\n",
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using a Simple RNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 503,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([keras.layers.SimpleRNN(1)])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\")\n",
+    "history = model.fit(X_train, y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 504,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.evaluate(X_valid, y_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 505,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 506,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = model.predict(X_valid)\n",
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Deep RNNs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 513,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(1)\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\")\n",
+    "history = model.fit(X_train, y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 514,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.evaluate(X_valid, y_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 515,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 516,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = model.predict(X_valid)\n",
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 517,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(20),\n",
+    "    keras.layers.Dense(1)\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\")\n",
+    "history = model.fit(X_train, y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 518,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.evaluate(X_valid, y_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 519,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 520,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = model.predict(X_valid)\n",
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Forecasting Several Steps Ahead"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 390,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_new.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 394,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Y_pred.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 401,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(43) # not 42, as it would give the first series in the train set\n",
+    "\n",
+    "series = generate_time_series(1, n_steps + 10)\n",
+    "X_new, Y_new = series[:, :n_steps], series[:, n_steps:]\n",
+    "X = X_new\n",
+    "for step_ahead in range(10):\n",
+    "    y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]\n",
+    "    X = np.concatenate([X, y_pred_one], axis=1)\n",
+    "\n",
+    "Y_pred = X[:, n_steps:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 402,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Y_pred.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 403,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_series(X_new[0, :50, 0])\n",
+    "plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
+    "plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
+    "plt.axis([0, 60, -1, 1])\n",
+    "save_fig(\"forecast_ahead_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's create an RNN that predicts all 10 next values at once:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 521,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "\n",
+    "n_steps = 50\n",
+    "series = generate_time_series(10000, n_steps + 10)\n",
+    "X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n",
+    "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n",
+    "X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 522,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1)),\n",
+    "    keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\")\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 523,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(43)\n",
+    "\n",
+    "series = generate_time_series(1, 50 + 10)\n",
+    "X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n",
+    "Y_pred = model.predict(X_new)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 524,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_series(X_new[0, :50, 0])\n",
+    "plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
+    "plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
+    "plt.axis([0, 60, -1, 1])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 531,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "\n",
+    "n_steps = 50\n",
+    "series = generate_time_series(10000, n_steps + 10)\n",
+    "X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n",
+    "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n",
+    "X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 532,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train.shape, Y_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 527,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "def last_10_time_steps_mse(Y_true, Y_pred):\n",
+    "    return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 409,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(43)\n",
+    "\n",
+    "series = generate_time_series(1, 50 + 10)\n",
+    "X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
+    "Y_pred = model.predict(X_new)[:, -10:, :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 410,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_series(X_new[0, :50, 0])\n",
+    "plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n",
+    "plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n",
+    "plt.axis([0, 60, -1, 1])\n",
+    "save_fig(\"forecast_ahead_multi_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Deep RNN with Batch Norm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 534,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.BatchNormalization(),\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.BatchNormalization(),\n",
+    "    keras.layers.SimpleRNN(20, return_sequences=True),\n",
+    "    keras.layers.BatchNormalization(),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 365,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Deep RNNs with Layer Norm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 619,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.layers.GRUCell.get_initial_state?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 622,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "LayerNormalization = keras.layers.experimental.LayerNormalization\n",
+    "\n",
+    "class LNSimpleRNNCell(keras.layers.Layer):\n",
+    "    def __init__(self, units, activation=\"tanh\", **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.state_size = units\n",
+    "        self.output_size = units\n",
+    "        self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,\n",
+    "                                                          activation=None)\n",
+    "        self.layer_norm = LayerNormalization()\n",
+    "        self.activation = keras.activations.get(activation)\n",
+    "    def get_initial_state(self, inputs=None, batch_size=None, dtype=None):\n",
+    "        return tf.zeros([batch_size, self.state_size], dtype=dtype)\n",
+    "    def call(self, inputs, states):\n",
+    "        outputs, new_states = self.simple_rnn_cell(inputs, states)\n",
+    "        norm_outputs = self.activation(self.layer_norm(outputs))\n",
+    "        return norm_outputs, [norm_outputs]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 623,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
+    "    keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LSTMs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 626,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.LSTM(20, return_sequences=True),\n",
+    "    keras.layers.LSTM(20, return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 368,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.evaluate(X_valid, y_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 369,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 370,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "y_pred = model.predict(X_valid)\n",
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# GRUs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 648,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.GRU(20, return_sequences=True),\n",
+    "    keras.layers.GRU(20, return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 372,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.evaluate(X_valid, y_valid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 373,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 374,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "y_pred = model.predict(X_valid)\n",
+    "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using One-Dimensional Convolutional Layers to Process Sequences"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "1D conv layer with kernel size 4, stride 2, VALID padding:\n",
+    "\n",
+    "              |-----2----|      |-----5---...         |----23-----|\n",
+    "        |-----1----|      |-----4-----|   ...   |-----22----|\n",
+    "  |-----0----|      |-----3----|      |---...-21---|\n",
+    "X: 0  1  2  3  4  5  6  7  8  9  10 11 12 ... 43 44 45 46 47 48 49\n",
+    "Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
+    "\n",
+    "Output:\n",
+    "\n",
+    "X: 0  1  2  3  4  5  ... 19 20 21 22 23\n",
+    "Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 638,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "def last_5_time_steps_mse(Y_true, Y_pred):\n",
+    "    return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\"),\n",
+    "    keras.layers.GRU(20, return_sequences=True),\n",
+    "    keras.layers.GRU(20, return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid[:, 3::2]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## WaveNet"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "C2     /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n",
+    "      /  \\  /  \\  /  \\  /  \\  /  \\  /  \\       /  \\  /  \\  /  \\\n",
+    "     /    \\      /    \\      /    \\                 /    \\\n",
+    "C1  /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\  /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n",
+    "X: 0  1  2  3  4  5  6  7  8  9  10 11 12 ... 43 44 45 46 47 48 49\n",
+    "Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
+    "\n",
+    "Output:\n",
+    "\n",
+    "X: 0  1  2  3  4  5  ... 19 20 21 22 23\n",
+    "Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 671,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = keras.models.Sequential()\n",
+    "for rate in (1, 2, 4, 8) * 2:\n",
+    "    activation = \"relu\" if len(model.layers) < 7 else None\n",
+    "    model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n",
+    "                                  activation=activation, dilation_rate=rate))\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train[:, 30:], epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid[:, 30:]))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here is the original WaveNet defined in the paper: it uses Gated Activation Units instead of ReLU and parametrized skip connections, plus it pads with zeros on the left to avoid getting shorter and shorter sequences:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 734,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tensorflow import keras\n",
+    "\n",
+    "class GatedActivationUnit(keras.layers.Layer):\n",
+    "    def __init__(self, activation=\"tanh\", **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.activation = keras.activations.get(activation)\n",
+    "    def call(self, inputs):\n",
+    "        n_filters = inputs.shape[-1] // 2\n",
+    "        linear_output = self.activation(inputs[..., :n_filters])\n",
+    "        gate = keras.activations.sigmoid(inputs[..., n_filters:])\n",
+    "        return self.activation(linear_output) * gate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 735,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def wavenet_residual_block(inputs, n_filters, dilation_rate):\n",
+    "    z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n",
+    "    z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n",
+    "                            dilation_rate=dilation_rate)(z)\n",
+    "    z = GatedActivationUnit()(z)\n",
+    "    z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n",
+    "    return keras.layers.Add()([z, inputs]), z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 736,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs = keras.layers.Input(shape=[10000, 1])\n",
+    "skip_to_last = []\n",
+    "n_filters = 128\n",
+    "z = keras.backend.temporal_padding(inputs, (1, 0))\n",
+    "z = keras.layers.Conv1D(n_filters, kernel_size=2, kernel_size=1)(z)\n",
+    "for dilation_rate in [2**i for i in range(10)] * 3:\n",
+    "    z, skip = wavenet_residual_block(z, 128, dilation_rate)\n",
+    "    skip_to_last.append(skip)\n",
+    "z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
+    "z = keras.layers.Conv1D(128, kernel_size=1, activation=\"relu\")(z)\n",
+    "Y_proba = keras.layers.Conv1D(256, kernel_size=1, activation=\"softmax\")(z)\n",
+    "\n",
+    "model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 732,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seq_length = 10000\n",
+    "n_layers_per_block = 10\n",
+    "n_blocks = 3\n",
+    "n_filters = 128\n",
+    "n_outputs = 256\n",
+    "\n",
+    "inputs = keras.layers.Input(shape=[seq_length, 1])\n",
+    "skip_to_last = []\n",
+    "z = inputs\n",
+    "for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n",
+    "    z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n",
+    "    skip_to_last.append(skip)\n",
+    "z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n",
+    "z = keras.layers.Conv1D(n_filters, kernel_size=1, activation=\"relu\")(z)\n",
+    "Y_proba = keras.layers.Conv1D(n_outputs, kernel_size=1, activation=\"softmax\")(z)\n",
+    "\n",
+    "model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Low-Level RNN API"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 611,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MyRNN(keras.layers.Layer):\n",
+    "    def __init__(self, cell, return_sequences=False, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.cell = cell\n",
+    "        self.return_sequences = return_sequences\n",
+    "        try:\n",
+    "            self.initial_state = self.cell.get_initial_state()\n",
+    "        except AttributeError:\n",
+    "            self.initial_state = [tf.zeros(shape=[size], dtype=inputs.dtype)\n",
+    "                                  for size in self.cell.states_size]\n",
+    "    def call(self, inputs):\n",
+    "        n_steps = tf.shape(inputs)[1]\n",
+    "        if self.return_sequences:\n",
+    "            sequences = tf.TensorArray(inputs.dtype, size=n_steps)\n",
+    "        for step in tf.range(n_steps):\n",
+    "            outputs, states = self.cell(inputs[:, step], states)\n",
+    "            if self.return_sequences:\n",
+    "                sequences.write(step, outputs)\n",
+    "        if self.return_sequences:\n",
+    "            return sequences.stack(), states\n",
+    "        else:\n",
+    "            return outputs, states"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 612,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
+    "    MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
+    "])\n",
+    "\n",
+    "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
+    "history = model.fit(X_train, Y_train, epochs=20,\n",
+    "                    validation_data=(X_valid, Y_valid))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "\n",
+    "X_batch = X_train[:batch_size]\n",
+    "y_batch = y_train[:batch_size]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_neurons = 10\n",
+    "cell = keras.layers.SimpleRNNCell(n_neurons)\n",
+    "\n",
+    "states = [tf.zeros((batch_size, n_neurons))]\n",
+    "for step in range(n_steps):\n",
+    "    output, states = cell(X_batch[:, step], states)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Splitting a sequence into batches of shuffled windows"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For example, let's split the sequence 0 to 14 into windows of length 5, each shifted by 2 (e.g.,`[0, 1, 2, 3, 4]`, `[2, 3, 4, 5, 6]`, etc.), then shuffle them, and split them into inputs (the first 4 steps) and targets (the last 4 steps) (e.g., `[2, 3, 4, 5, 6]` would be split into `[[2, 3, 4, 5], [3, 4, 5, 6]]`), then create batches of 3 such input/target pairs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 467,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "n_steps = 5\n",
+    "dataset = tf.data.Dataset.from_tensor_slices(tf.range(15))\n",
+    "dataset = dataset.window(n_steps, shift=2, drop_remainder=True)\n",
+    "dataset = dataset.flat_map(lambda window: window.batch(n_steps))\n",
+    "dataset = dataset.shuffle(10).map(lambda window: (window[:-1], window[1:]))\n",
+    "dataset = dataset.batch(3).prefetch(1)\n",
+    "for index, (X_batch, y_batch) in enumerate(dataset):\n",
+    "    print(\"_\" * 20, \"Batch\", index, \"\\nX_batch\")\n",
+    "    print(X_batch.numpy())\n",
+    "    print(\"=\" * 5, \"\\nY_batch\")\n",
+    "    print(y_batch.numpy())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Char-RNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 452,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shakespeare_url = \"https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\"\n",
+    "filepath = keras.utils.get_file(\"shakespeare.txt\", shakespeare_url)\n",
+    "with open(filepath) as f:\n",
+    "    shakespeare_text = f.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 453,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(shakespeare_text[:148])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 454,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\".join(sorted(set(shakespeare_text.lower())))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 468,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)\n",
+    "tokenizer.fit_on_texts(shakespeare_text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 469,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer.texts_to_sequences([\"First\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 470,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 471,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_id = len(tokenizer.word_index) # number of distinct characters\n",
+    "dataset_size = tokenizer.document_count # total number of characters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 472,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text]))\n",
+    "train_size = dataset_size * 90 // 100\n",
+    "dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 473,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_steps = 100 + 1 # 100 input characters, 1 target\n",
+    "dataset = dataset.repeat().window(n_steps, shift=1, drop_remainder=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 474,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = dataset.flat_map(lambda window: window.batch(n_steps))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 475,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = dataset.shuffle(10000).map(lambda window: (window[:-1], window[1:]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 476,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = dataset.map(\n",
+    "    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 477,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "dataset = dataset.batch(batch_size).prefetch(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 478,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for X_batch, Y_batch in dataset.take(1):\n",
+    "    print(X_batch.shape, Y_batch.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 482,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.GRU(128, return_sequences=True),\n",
+    "    keras.layers.GRU(128, return_sequences=True),\n",
+    "    keras.layers.GRU(max_id, return_sequences=True, activation=\"softmax\"),\n",
+    "])\n",
+    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n",
+    "history = model.fit(dataset, steps_per_epoch=train_size // batch_size,\n",
+    "                    epochs=20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 490,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = keras.models.Sequential([\n",
+    "    keras.layers.LSTM(128, return_sequences=True),\n",
+    "    keras.layers.LSTM(3, return_sequences=True, activation=\"softmax\"),\n",
+    "    #keras.layers.TimeDistributed(keras.layers.Dense(3, activation=\"softmax\")),\n",
+    "])\n",
+    "m.predict(np.random.rand(1, 10, 20)).sum(axis=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 222,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(texts):\n",
+    "    X = np.array(tokenizer.texts_to_sequences(texts))\n",
+    "    return tf.one_hot(X, max_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 224,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_new = preprocess([\"How are yo\"])\n",
+    "y_pred = model.predict_classes(X_new)\n",
+    "tokenizer.sequences_to_texts([y_pred])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.layers[-1].weights[1].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 232,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def next_char(texts, temperature=1):\n",
+    "    X_new = preprocess(texts)\n",
+    "    y_proba = model.predict(X_new)\n",
+    "    logits = tf.math.log(y_proba) / temperature\n",
+    "    char_id = tf.random.categorical(logits, 1)\n",
+    "    return tokenizer.sequences_to_texts(char_id.numpy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 234,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def complete_text(text, n_chars=50, temperature=1):\n",
+    "    for _ in range(n_chars):\n",
+    "        text += next_char([text], temperature)[0]\n",
+    "    return text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 238,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(complete_text(\"W\", temperature=0.001))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 365,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(complete_text(\"W\", temperature=0.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 240,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(complete_text(\"W\", temperature=1000))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Handling Sequences of Different Sizes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's shorten each time series by chopping off a random number of time steps (from the start, so we don't need to change the targets):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def shorten_series(X):\n",
+    "    row_lengths = np.random.randint(10, n_steps + 1, size=len(X))\n",
+    "    X_values = np.concatenate([row[-length:] for row, length in zip(X, row_lengths)])\n",
+    "    return tf.RaggedTensor.from_row_lengths(X_values, row_lengths)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "\n",
+    "X_train_ragged = shorten_series(X_train)\n",
+    "X_valid_ragged = shorten_series(X_valid)\n",
+    "X_test_ragged = shorten_series(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train_ragged.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The lengths of the first 10 series:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[len(series) for series in X_train_ragged[:10]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mask_value = 1000.\n",
+    "X_train_padded = X_train_ragged.to_tensor(default_value=mask_value)\n",
+    "X_valid_padded = X_valid_ragged.to_tensor(default_value=mask_value)\n",
+    "X_test_padded = X_test_ragged.to_tensor(default_value=mask_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 149,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "masking_layer = keras.layers.Masking(mask_value)\n",
+    "series = np.array([[[1.], [2.], [mask_value], [mask_value]],\n",
+    "                   [[3.], [4.], [5.], [mask_value]]])\n",
+    "masking_layer(series)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "masking_layer.compute_mask(series)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 165,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.Masking(mask_value, input_shape=[50, 1]),\n",
+    "    keras.layers.SimpleRNN(10, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(10, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(1, return_sequences=True),\n",
+    "])\n",
+    "model(X_train_padded[:1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 170,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.Masking(mask_value, input_shape=[50, 1]),\n",
+    "    keras.layers.SimpleRNN(10, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(100),\n",
+    "])\n",
+    "model(X_train_padded[:1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.Masking(input_shape=[50, 1]),\n",
+    "    keras.layers.SimpleRNN(10, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(10, return_sequences=True),\n",
+    "    keras.layers.SimpleRNN(1, activation=None)\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer = keras.optimizers.SGD(lr=1e-4, momentum=0.95, nesterov=True)\n",
+    "model.compile(loss=\"mse\", optimizer=optimizer)\n",
+    "history = model.fit(X_train_padded, tf.constant(y_train), epochs=20,\n",
+    "                    validation_data=(X_valid_padded, tf.constant(y_valid)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Sketch RNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow_datasets as tfds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datasets = tfds.load(\"quickdraw_sketch_rnn\", as_supervised=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_set = datasets[\"train\"]\n",
+    "valid_set = datasets[\"validation\"]\n",
+    "test_set = datasets[\"test\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for X_batch, y_batch in train_set.take(2):\n",
+    "    print(X_batch.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 3,
@@ -496,7 +2042,9 @@
   {
    "cell_type": "code",
    "execution_count": 33,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "print(states_val)"
@@ -2482,7 +4030,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.6.8"
   },
   "nav_menu": {},
   "toc": {