From 6d7dcd7c80b6ec1728988642bbbb9ec025d30178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Fri, 5 Apr 2019 17:04:38 +0800 Subject: [PATCH] Work in progress updating ch15 --- ...pynb => 15_recurrent_neural_networks.ipynb | 1620 ++++++++++++++++- 1 file changed, 1584 insertions(+), 36 deletions(-) rename work_in_progress/14_recurrent_neural_networks.ipynb => 15_recurrent_neural_networks.ipynb (61%) diff --git a/work_in_progress/14_recurrent_neural_networks.ipynb b/15_recurrent_neural_networks.ipynb similarity index 61% rename from work_in_progress/14_recurrent_neural_networks.ipynb rename to 15_recurrent_neural_networks.ipynb index d84171e..635be5a 100644 --- a/work_in_progress/14_recurrent_neural_networks.ipynb +++ b/15_recurrent_neural_networks.ipynb @@ -4,14 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Chapter 14 – Recurrent Neural Networks**" + "**Chapter 15 – Recurrent Neural Networks**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "_This notebook contains all the sample code and solutions to the exercises in chapter 14._" + "_This notebook contains all the sample code in chapter 15._" ] }, { @@ -25,62 +25,56 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", + "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", + "# TensorFlow ≥2.0-preview is required\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "assert tf.__version__ >= \"2.0\"\n", "\n", "# Common imports\n", "import numpy as np\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", - "def reset_graph(seed=42):\n", - " tf.reset_default_graph()\n", - " tf.set_random_seed(seed)\n", - " np.random.seed(seed)\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", - "import matplotlib\n", + "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"rnn\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then of course we will need TensorFlow:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf" + " plt.savefig(path, format=fig_extension, dpi=resolution)" ] }, { @@ -94,9 +88,1561 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Manual RNN" + "### Generate the Dataset" ] }, + { + "cell_type": "code", + "execution_count": 493, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_time_series(batch_size, n_steps):\n", + " freq1, freq2, offsets1, offsets2 = np.random.rand(4, batch_size, 1)\n", + " time = np.linspace(0, 1, n_steps)\n", + " series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10)) # wave 1\n", + " series += 0.2 * np.sin((time - offsets2) * (freq2 * 20 + 20)) # + wave 2\n", + " series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5) # + noise\n", + " return series[..., np.newaxis].astype(np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 494, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "n_steps = 50\n", + "series = generate_time_series(10000, n_steps + 1)\n", + "X_train, y_train = series[:7000, :n_steps], series[:7000, -1]\n", + "X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]\n", + "X_test, y_test = series[9000:, :n_steps], series[9000:, -1]" + ] + }, + { + "cell_type": "code", + "execution_count": 495, + "metadata": {}, + "outputs": [], + "source": [ + "X_train.shape, y_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 496, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\"):\n", + " plt.plot(series, \".-\")\n", + " if y is not None:\n", + " plt.plot(n_steps, y, \"bx\", markersize=10)\n", + " if y_pred is not None:\n", + " plt.plot(n_steps, y_pred, \"ro\")\n", + " plt.grid(True)\n", + " if x_label:\n", + " plt.xlabel(x_label, fontsize=16)\n", + " if y_label:\n", + " plt.ylabel(y_label, fontsize=16, rotation=0)\n", + " plt.hlines(0, 0, 100, linewidth=1)\n", + " plt.axis([0, n_steps + 1, -1, 1])\n", + "\n", + "fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))\n", + "for col in range(3):\n", + " plt.sca(axes[col])\n", + " plot_series(X_valid[col, :, 0], y_valid[col, 0],\n", + " y_label=(\"$x(t)$\" if col==0 else None))\n", + "save_fig(\"time_series_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Computing Some Baselines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Naive predictions (just predict the last observed value):" + ] + }, + { + "cell_type": "code", + "execution_count": 497, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = X_valid[:, -1]\n", + "np.mean(keras.losses.mean_squared_error(y_valid, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 498, + "metadata": {}, + "outputs": [], + "source": [ + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Linear predictions:" + ] + }, + { + "cell_type": "code", + "execution_count": 499, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(),\n", + " keras.layers.Dense(1)\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\")\n", + "history = model.fit(X_train, y_train, epochs=20,\n", + " validation_data=(X_valid, y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 500, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_valid, y_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 501, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_learning_curves(loss, val_loss):\n", + " plt.plot(np.arange(len(loss)) + 0.5, loss, \"b.-\", label=\"Training loss\")\n", + " plt.plot(np.arange(len(val_loss)) + 1, val_loss, \"r.-\", label=\"Validation loss\")\n", + " plt.gca().xaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))\n", + " plt.axis([1, 20, 0, 0.05])\n", + " plt.legend(fontsize=14)\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(\"Loss\")\n", + " plt.grid(True)\n", + "\n", + "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 502, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_valid)\n", + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using a Simple RNN" + ] + }, + { + "cell_type": "code", + "execution_count": 503, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([keras.layers.SimpleRNN(1)])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\")\n", + "history = model.fit(X_train, y_train, epochs=20,\n", + " validation_data=(X_valid, y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 504, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_valid, y_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 505, + "metadata": {}, + "outputs": [], + "source": [ + "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 506, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_valid)\n", + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_pred[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deep RNNs" + ] + }, + { + "cell_type": "code", + "execution_count": 513, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.SimpleRNN(1)\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\")\n", + "history = model.fit(X_train, y_train, epochs=20,\n", + " validation_data=(X_valid, y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 514, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_valid, y_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 515, + "metadata": {}, + "outputs": [], + "source": [ + "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 516, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_valid)\n", + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 517, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.SimpleRNN(20),\n", + " keras.layers.Dense(1)\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\")\n", + "history = model.fit(X_train, y_train, epochs=20,\n", + " validation_data=(X_valid, y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 518, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_valid, y_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 519, + "metadata": {}, + "outputs": [], + "source": [ + "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 520, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict(X_valid)\n", + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Forecasting Several Steps Ahead" + ] + }, + { + "cell_type": "code", + "execution_count": 390, + "metadata": {}, + "outputs": [], + "source": [ + "X_new.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 394, + "metadata": {}, + "outputs": [], + "source": [ + "Y_pred.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 401, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(43) # not 42, as it would give the first series in the train set\n", + "\n", + "series = generate_time_series(1, n_steps + 10)\n", + "X_new, Y_new = series[:, :n_steps], series[:, n_steps:]\n", + "X = X_new\n", + "for step_ahead in range(10):\n", + " y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]\n", + " X = np.concatenate([X, y_pred_one], axis=1)\n", + "\n", + "Y_pred = X[:, n_steps:]" + ] + }, + { + "cell_type": "code", + "execution_count": 402, + "metadata": {}, + "outputs": [], + "source": [ + "Y_pred.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 403, + "metadata": {}, + "outputs": [], + "source": [ + "plot_series(X_new[0, :50, 0])\n", + "plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n", + "plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n", + "plt.axis([0, 60, -1, 1])\n", + "save_fig(\"forecast_ahead_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's create an RNN that predicts all 10 next values at once:" + ] + }, + { + "cell_type": "code", + "execution_count": 521, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "n_steps = 50\n", + "series = generate_time_series(10000, n_steps + 10)\n", + "X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n", + "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n", + "X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 522, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1)),\n", + " keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\")\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 523, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(43)\n", + "\n", + "series = generate_time_series(1, 50 + 10)\n", + "X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n", + "Y_pred = model.predict(X_new)" + ] + }, + { + "cell_type": "code", + "execution_count": 524, + "metadata": {}, + "outputs": [], + "source": [ + "plot_series(X_new[0, :50, 0])\n", + "plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n", + "plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n", + "plt.axis([0, 60, -1, 1])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future:" + ] + }, + { + "cell_type": "code", + "execution_count": 531, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "n_steps = 50\n", + "series = generate_time_series(10000, n_steps + 10)\n", + "X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n", + "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n", + "X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 532, + "metadata": {}, + "outputs": [], + "source": [ + "X_train.shape, Y_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 527, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "def last_10_time_steps_mse(Y_true, Y_pred):\n", + " return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 409, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(43)\n", + "\n", + "series = generate_time_series(1, 50 + 10)\n", + "X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n", + "Y_pred = model.predict(X_new)[:, -10:, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 410, + "metadata": {}, + "outputs": [], + "source": [ + "plot_series(X_new[0, :50, 0])\n", + "plt.plot(np.arange(50, 60), Y_pred[0, :, 0], \"ro-\")\n", + "plt.plot(np.arange(50, 60), Y_new[0, :, 0], \"bx-\", markersize=10)\n", + "plt.axis([0, 60, -1, 1])\n", + "save_fig(\"forecast_ahead_multi_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deep RNN with Batch Norm" + ] + }, + { + "cell_type": "code", + "execution_count": 534, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.SimpleRNN(20, return_sequences=True),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 365, + "metadata": {}, + "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deep RNNs with Layer Norm" + ] + }, + { + "cell_type": "code", + "execution_count": 619, + "metadata": {}, + "outputs": [], + "source": [ + "keras.layers.GRUCell.get_initial_state?" + ] + }, + { + "cell_type": "code", + "execution_count": 622, + "metadata": {}, + "outputs": [], + "source": [ + "LayerNormalization = keras.layers.experimental.LayerNormalization\n", + "\n", + "class LNSimpleRNNCell(keras.layers.Layer):\n", + " def __init__(self, units, activation=\"tanh\", **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.state_size = units\n", + " self.output_size = units\n", + " self.simple_rnn_cell = keras.layers.SimpleRNNCell(units,\n", + " activation=None)\n", + " self.layer_norm = LayerNormalization()\n", + " self.activation = keras.activations.get(activation)\n", + " def get_initial_state(self, inputs=None, batch_size=None, dtype=None):\n", + " return tf.zeros([batch_size, self.state_size], dtype=dtype)\n", + " def call(self, inputs, states):\n", + " outputs, new_states = self.simple_rnn_cell(inputs, states)\n", + " norm_outputs = self.activation(self.layer_norm(outputs))\n", + " return norm_outputs, [norm_outputs]" + ] + }, + { + "cell_type": "code", + "execution_count": 623, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n", + " keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LSTMs" + ] + }, + { + "cell_type": "code", + "execution_count": 626, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.LSTM(20, return_sequences=True),\n", + " keras.layers.LSTM(20, return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 368, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_valid, y_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 369, + "metadata": {}, + "outputs": [], + "source": [ + "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 370, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "y_pred = model.predict(X_valid)\n", + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GRUs" + ] + }, + { + "cell_type": "code", + "execution_count": 648, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.GRU(20, return_sequences=True),\n", + " keras.layers.GRU(20, return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 372, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_valid, y_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 373, + "metadata": {}, + "outputs": [], + "source": [ + "plot_learning_curves(history.history[\"loss\"], history.history[\"val_loss\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 374, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "y_pred = model.predict(X_valid)\n", + "plot_series(X_valid[0, :, 0], y_valid[0, 0], y_valid[0, 0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using One-Dimensional Convolutional Layers to Process Sequences" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "1D conv layer with kernel size 4, stride 2, VALID padding:\n", + "\n", + " |-----2----| |-----5---... |----23-----|\n", + " |-----1----| |-----4-----| ... |-----22----|\n", + " |-----0----| |-----3----| |---...-21---|\n", + "X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n", + "Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n", + "\n", + "Output:\n", + "\n", + "X: 0 1 2 3 4 5 ... 19 20 21 22 23\n", + "Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 638, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "def last_5_time_steps_mse(Y_true, Y_pred):\n", + " return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\"),\n", + " keras.layers.GRU(20, return_sequences=True),\n", + " keras.layers.GRU(20, return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n", + "history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n", + " validation_data=(X_valid, Y_valid[:, 3::2]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## WaveNet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n", + " / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n", + " / \\ / \\ / \\ / \\\n", + "C1 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n", + "X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n", + "Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n", + "\n", + "Output:\n", + "\n", + "X: 0 1 2 3 4 5 ... 19 20 21 22 23\n", + "Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 671, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential()\n", + "for rate in (1, 2, 4, 8) * 2:\n", + " activation = \"relu\" if len(model.layers) < 7 else None\n", + " model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n", + " activation=activation, dilation_rate=rate))\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train[:, 30:], epochs=20,\n", + " validation_data=(X_valid, Y_valid[:, 30:]))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is the original WaveNet defined in the paper: it uses Gated Activation Units instead of ReLU and parametrized skip connections, plus it pads with zeros on the left to avoid getting shorter and shorter sequences:" + ] + }, + { + "cell_type": "code", + "execution_count": 734, + "metadata": {}, + "outputs": [], + "source": [ + "from tensorflow import keras\n", + "\n", + "class GatedActivationUnit(keras.layers.Layer):\n", + " def __init__(self, activation=\"tanh\", **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.activation = keras.activations.get(activation)\n", + " def call(self, inputs):\n", + " n_filters = inputs.shape[-1] // 2\n", + " linear_output = self.activation(inputs[..., :n_filters])\n", + " gate = keras.activations.sigmoid(inputs[..., n_filters:])\n", + " return self.activation(linear_output) * gate" + ] + }, + { + "cell_type": "code", + "execution_count": 735, + "metadata": {}, + "outputs": [], + "source": [ + "def wavenet_residual_block(inputs, n_filters, dilation_rate):\n", + " z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n", + " z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n", + " dilation_rate=dilation_rate)(z)\n", + " z = GatedActivationUnit()(z)\n", + " z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n", + " return keras.layers.Add()([z, inputs]), z" + ] + }, + { + "cell_type": "code", + "execution_count": 736, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = keras.layers.Input(shape=[10000, 1])\n", + "skip_to_last = []\n", + "n_filters = 128\n", + "z = keras.backend.temporal_padding(inputs, (1, 0))\n", + "z = keras.layers.Conv1D(n_filters, kernel_size=2, kernel_size=1)(z)\n", + "for dilation_rate in [2**i for i in range(10)] * 3:\n", + " z, skip = wavenet_residual_block(z, 128, dilation_rate)\n", + " skip_to_last.append(skip)\n", + "z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n", + "z = keras.layers.Conv1D(128, kernel_size=1, activation=\"relu\")(z)\n", + "Y_proba = keras.layers.Conv1D(256, kernel_size=1, activation=\"softmax\")(z)\n", + "\n", + "model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])" + ] + }, + { + "cell_type": "code", + "execution_count": 732, + "metadata": {}, + "outputs": [], + "source": [ + "seq_length = 10000\n", + "n_layers_per_block = 10\n", + "n_blocks = 3\n", + "n_filters = 128\n", + "n_outputs = 256\n", + "\n", + "inputs = keras.layers.Input(shape=[seq_length, 1])\n", + "skip_to_last = []\n", + "z = inputs\n", + "for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n", + " z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n", + " skip_to_last.append(skip)\n", + "z = keras.activations.relu(keras.layers.Add()(skip_to_last))\n", + "z = keras.layers.Conv1D(n_filters, kernel_size=1, activation=\"relu\")(z)\n", + "Y_proba = keras.layers.Conv1D(n_outputs, kernel_size=1, activation=\"softmax\")(z)\n", + "\n", + "model = keras.models.Model(inputs=[inputs], outputs=[Y_proba])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Low-Level RNN API" + ] + }, + { + "cell_type": "code", + "execution_count": 611, + "metadata": {}, + "outputs": [], + "source": [ + "class MyRNN(keras.layers.Layer):\n", + " def __init__(self, cell, return_sequences=False, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.cell = cell\n", + " self.return_sequences = return_sequences\n", + " try:\n", + " self.initial_state = self.cell.get_initial_state()\n", + " except AttributeError:\n", + " self.initial_state = [tf.zeros(shape=[size], dtype=inputs.dtype)\n", + " for size in self.cell.states_size]\n", + " def call(self, inputs):\n", + " n_steps = tf.shape(inputs)[1]\n", + " if self.return_sequences:\n", + " sequences = tf.TensorArray(inputs.dtype, size=n_steps)\n", + " for step in tf.range(n_steps):\n", + " outputs, states = self.cell(inputs[:, step], states)\n", + " if self.return_sequences:\n", + " sequences.write(step, outputs)\n", + " if self.return_sequences:\n", + " return sequences.stack(), states\n", + " else:\n", + " return outputs, states" + ] + }, + { + "cell_type": "code", + "execution_count": 612, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n", + " MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + "])\n", + "\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "history = model.fit(X_train, Y_train, epochs=20,\n", + " validation_data=(X_valid, Y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 32\n", + "\n", + "X_batch = X_train[:batch_size]\n", + "y_batch = y_train[:batch_size]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "n_neurons = 10\n", + "cell = keras.layers.SimpleRNNCell(n_neurons)\n", + "\n", + "states = [tf.zeros((batch_size, n_neurons))]\n", + "for step in range(n_steps):\n", + " output, states = cell(X_batch[:, step], states)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting a sequence into batches of shuffled windows" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example, let's split the sequence 0 to 14 into windows of length 5, each shifted by 2 (e.g.,`[0, 1, 2, 3, 4]`, `[2, 3, 4, 5, 6]`, etc.), then shuffle them, and split them into inputs (the first 4 steps) and targets (the last 4 steps) (e.g., `[2, 3, 4, 5, 6]` would be split into `[[2, 3, 4, 5], [3, 4, 5, 6]]`), then create batches of 3 such input/target pairs:" + ] + }, + { + "cell_type": "code", + "execution_count": 467, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "n_steps = 5\n", + "dataset = tf.data.Dataset.from_tensor_slices(tf.range(15))\n", + "dataset = dataset.window(n_steps, shift=2, drop_remainder=True)\n", + "dataset = dataset.flat_map(lambda window: window.batch(n_steps))\n", + "dataset = dataset.shuffle(10).map(lambda window: (window[:-1], window[1:]))\n", + "dataset = dataset.batch(3).prefetch(1)\n", + "for index, (X_batch, y_batch) in enumerate(dataset):\n", + " print(\"_\" * 20, \"Batch\", index, \"\\nX_batch\")\n", + " print(X_batch.numpy())\n", + " print(\"=\" * 5, \"\\nY_batch\")\n", + " print(y_batch.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Char-RNN" + ] + }, + { + "cell_type": "code", + "execution_count": 452, + "metadata": {}, + "outputs": [], + "source": [ + "shakespeare_url = \"https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\"\n", + "filepath = keras.utils.get_file(\"shakespeare.txt\", shakespeare_url)\n", + "with open(filepath) as f:\n", + " shakespeare_text = f.read()" + ] + }, + { + "cell_type": "code", + "execution_count": 453, + "metadata": {}, + "outputs": [], + "source": [ + "print(shakespeare_text[:148])" + ] + }, + { + "cell_type": "code", + "execution_count": 454, + "metadata": {}, + "outputs": [], + "source": [ + "\"\".join(sorted(set(shakespeare_text.lower())))" + ] + }, + { + "cell_type": "code", + "execution_count": 468, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)\n", + "tokenizer.fit_on_texts(shakespeare_text)" + ] + }, + { + "cell_type": "code", + "execution_count": 469, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer.texts_to_sequences([\"First\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 470, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])" + ] + }, + { + "cell_type": "code", + "execution_count": 471, + "metadata": {}, + "outputs": [], + "source": [ + "max_id = len(tokenizer.word_index) # number of distinct characters\n", + "dataset_size = tokenizer.document_count # total number of characters" + ] + }, + { + "cell_type": "code", + "execution_count": 472, + "metadata": {}, + "outputs": [], + "source": [ + "[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text]))\n", + "train_size = dataset_size * 90 // 100\n", + "dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])" + ] + }, + { + "cell_type": "code", + "execution_count": 473, + "metadata": {}, + "outputs": [], + "source": [ + "n_steps = 100 + 1 # 100 input characters, 1 target\n", + "dataset = dataset.repeat().window(n_steps, shift=1, drop_remainder=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 474, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = dataset.flat_map(lambda window: window.batch(n_steps))" + ] + }, + { + "cell_type": "code", + "execution_count": 475, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = dataset.shuffle(10000).map(lambda window: (window[:-1], window[1:]))" + ] + }, + { + "cell_type": "code", + "execution_count": 476, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = dataset.map(\n", + " lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))" + ] + }, + { + "cell_type": "code", + "execution_count": 477, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 32\n", + "dataset = dataset.batch(batch_size).prefetch(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 478, + "metadata": {}, + "outputs": [], + "source": [ + "for X_batch, Y_batch in dataset.take(1):\n", + " print(X_batch.shape, Y_batch.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 482, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.GRU(128, return_sequences=True),\n", + " keras.layers.GRU(128, return_sequences=True),\n", + " keras.layers.GRU(max_id, return_sequences=True, activation=\"softmax\"),\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n", + "history = model.fit(dataset, steps_per_epoch=train_size // batch_size,\n", + " epochs=20)" + ] + }, + { + "cell_type": "code", + "execution_count": 490, + "metadata": {}, + "outputs": [], + "source": [ + "m = keras.models.Sequential([\n", + " keras.layers.LSTM(128, return_sequences=True),\n", + " keras.layers.LSTM(3, return_sequences=True, activation=\"softmax\"),\n", + " #keras.layers.TimeDistributed(keras.layers.Dense(3, activation=\"softmax\")),\n", + "])\n", + "m.predict(np.random.rand(1, 10, 20)).sum(axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess(texts):\n", + " X = np.array(tokenizer.texts_to_sequences(texts))\n", + " return tf.one_hot(X, max_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 224, + "metadata": {}, + "outputs": [], + "source": [ + "X_new = preprocess([\"How are yo\"])\n", + "y_pred = model.predict_classes(X_new)\n", + "tokenizer.sequences_to_texts([y_pred])" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [], + "source": [ + "model.layers[-1].weights[1].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 232, + "metadata": {}, + "outputs": [], + "source": [ + "def next_char(texts, temperature=1):\n", + " X_new = preprocess(texts)\n", + " y_proba = model.predict(X_new)\n", + " logits = tf.math.log(y_proba) / temperature\n", + " char_id = tf.random.categorical(logits, 1)\n", + " return tokenizer.sequences_to_texts(char_id.numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "metadata": {}, + "outputs": [], + "source": [ + "def complete_text(text, n_chars=50, temperature=1):\n", + " for _ in range(n_chars):\n", + " text += next_char([text], temperature)[0]\n", + " return text" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [], + "source": [ + "print(complete_text(\"W\", temperature=0.001))" + ] + }, + { + "cell_type": "code", + "execution_count": 365, + "metadata": {}, + "outputs": [], + "source": [ + "print(complete_text(\"W\", temperature=0.5))" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": {}, + "outputs": [], + "source": [ + "print(complete_text(\"W\", temperature=1000))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Handling Sequences of Different Sizes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's shorten each time series by chopping off a random number of time steps (from the start, so we don't need to change the targets):" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "def shorten_series(X):\n", + " row_lengths = np.random.randint(10, n_steps + 1, size=len(X))\n", + " X_values = np.concatenate([row[-length:] for row, length in zip(X, row_lengths)])\n", + " return tf.RaggedTensor.from_row_lengths(X_values, row_lengths)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "X_train_ragged = shorten_series(X_train)\n", + "X_valid_ragged = shorten_series(X_valid)\n", + "X_test_ragged = shorten_series(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_ragged.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The lengths of the first 10 series:" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "[len(series) for series in X_train_ragged[:10]]" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [], + "source": [ + "mask_value = 1000.\n", + "X_train_padded = X_train_ragged.to_tensor(default_value=mask_value)\n", + "X_valid_padded = X_valid_ragged.to_tensor(default_value=mask_value)\n", + "X_test_padded = X_test_ragged.to_tensor(default_value=mask_value)" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [], + "source": [ + "masking_layer = keras.layers.Masking(mask_value)\n", + "series = np.array([[[1.], [2.], [mask_value], [mask_value]],\n", + " [[3.], [4.], [5.], [mask_value]]])\n", + "masking_layer(series)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [], + "source": [ + "masking_layer.compute_mask(series)" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Masking(mask_value, input_shape=[50, 1]),\n", + " keras.layers.SimpleRNN(10, return_sequences=True),\n", + " keras.layers.SimpleRNN(10, return_sequences=True),\n", + " keras.layers.SimpleRNN(1, return_sequences=True),\n", + "])\n", + "model(X_train_padded[:1])" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Masking(mask_value, input_shape=[50, 1]),\n", + " keras.layers.SimpleRNN(10, return_sequences=True),\n", + " keras.layers.SimpleRNN(100),\n", + "])\n", + "model(X_train_padded[:1])" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Masking(input_shape=[50, 1]),\n", + " keras.layers.SimpleRNN(10, return_sequences=True),\n", + " keras.layers.SimpleRNN(10, return_sequences=True),\n", + " keras.layers.SimpleRNN(1, activation=None)\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = keras.optimizers.SGD(lr=1e-4, momentum=0.95, nesterov=True)\n", + "model.compile(loss=\"mse\", optimizer=optimizer)\n", + "history = model.fit(X_train_padded, tf.constant(y_train), epochs=20,\n", + " validation_data=(X_valid_padded, tf.constant(y_valid)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sketch RNN" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow_datasets as tfds" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "datasets = tfds.load(\"quickdraw_sketch_rnn\", as_supervised=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "train_set = datasets[\"train\"]\n", + "valid_set = datasets[\"validation\"]\n", + "test_set = datasets[\"test\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "for X_batch, y_batch in train_set.take(2):\n", + " print(X_batch.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 3, @@ -496,7 +2042,9 @@ { "cell_type": "code", "execution_count": 33, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "print(states_val)" @@ -2482,7 +4030,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.8" }, "nav_menu": {}, "toc": {