diff --git a/15_processing_sequences_using_rnns_and_cnns.ipynb b/15_processing_sequences_using_rnns_and_cnns.ipynb index 028e750..ba116b1 100644 --- a/15_processing_sequences_using_rnns_and_cnns.ipynb +++ b/15_processing_sequences_using_rnns_and_cnns.ipynb @@ -500,9 +500,9 @@ "\n", "n_steps = 50\n", "series = generate_time_series(10000, n_steps + 10)\n", - "X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n", - "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n", - "X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]" + "X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:, 0]\n", + "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:, 0]\n", + "X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:, 0]" ] }, { @@ -516,9 +516,8 @@ "\n", "model = keras.models.Sequential([\n", " keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n", - " keras.layers.SimpleRNN(20, return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1)),\n", - " keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n", + " keras.layers.SimpleRNN(20),\n", + " keras.layers.Dense(10)\n", "])\n", "\n", "model.compile(loss=\"mse\", optimizer=\"adam\")\n", @@ -536,7 +535,7 @@ "\n", "series = generate_time_series(1, 50 + 10)\n", "X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n", - "Y_pred = model.predict(X_new)[:, -10:, :]" + "Y_pred = model.predict(X_new)[..., np.newaxis]" ] }, { @@ -553,7 +552,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future. That is, instead of just forecasting time steps 50 to 59 based on time steps 0 to 49, it will forecast time steps 10 to 59 based on time steps 0 to 49 (the time steps 10 to 49 are in the input, but the model is causal so at any time step it cannot see the future inputs):" + "Now let's create an RNN that predicts the next 10 steps at each time step. That is, instead of just forecasting time steps 50 to 59 based on time steps 0 to 49, it will forecast time steps 1 to 10 at time step 0, then time steps 2 to 11 at time step 1, and so on, and finally it will forecast time steps 50 to 59 at the last time step. Notice that the model is causal: when it makes predictions at any time step, it can only see past time steps." ] }, { @@ -566,9 +565,15 @@ "\n", "n_steps = 50\n", "series = generate_time_series(10000, n_steps + 10)\n", - "X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n", - "X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n", - "X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]" + "X_train = series[:7000, :n_steps]\n", + "X_valid = series[7000:9000, :n_steps]\n", + "X_test = series[9000:, :n_steps]\n", + "Y = np.empty((10000, n_steps, 10))\n", + "for step_ahead in range(1, 10 + 1):\n", + " Y[..., step_ahead - 1] = series[..., step_ahead:step_ahead + n_steps, 0]\n", + "Y_train = Y[:7000]\n", + "Y_valid = Y[7000:9000]\n", + "Y_test = Y[9000:]" ] }, { @@ -592,13 +597,13 @@ "model = keras.models.Sequential([\n", " keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n", " keras.layers.SimpleRNN(20, return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "def last_10_time_steps_mse(Y_true, Y_pred):\n", - " return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n", + "def last_time_step_mse(Y_true, Y_pred):\n", + " return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -613,7 +618,7 @@ "\n", "series = generate_time_series(1, 50 + 10)\n", "X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n", - "Y_pred = model.predict(X_new)[:, -10:, :]" + "Y_pred = model.predict(X_new)[:, -1][..., np.newaxis]" ] }, { @@ -642,16 +647,15 @@ "np.random.seed(42)\n", "tf.random.set_seed(42)\n", "\n", - "\n", "model = keras.models.Sequential([\n", " keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n", " keras.layers.BatchNormalization(),\n", " keras.layers.SimpleRNN(20, return_sequences=True),\n", " keras.layers.BatchNormalization(),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -711,10 +715,10 @@ " keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True,\n", " input_shape=[None, 1]),\n", " keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -771,10 +775,10 @@ " MyRNN(LNSimpleRNNCell(20), return_sequences=True,\n", " input_shape=[None, 1]),\n", " MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -800,10 +804,10 @@ "model = keras.models.Sequential([\n", " keras.layers.LSTM(20, return_sequences=True, input_shape=[None, 1]),\n", " keras.layers.LSTM(20, return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -837,7 +841,7 @@ "\n", "series = generate_time_series(1, 50 + 10)\n", "X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n", - "Y_pred = model.predict(X_new)[:, -10:, :]" + "Y_pred = model.predict(X_new)[:, -1][..., np.newaxis]" ] }, { @@ -873,10 +877,10 @@ "model = keras.models.Sequential([\n", " keras.layers.GRU(20, return_sequences=True, input_shape=[None, 1]),\n", " keras.layers.GRU(20, return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -910,7 +914,7 @@ "\n", "series = generate_time_series(1, 50 + 10)\n", "X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n", - "Y_pred = model.predict(X_new)[:, -10:, :]" + "Y_pred = model.predict(X_new)[:, -1][..., np.newaxis]" ] }, { @@ -939,16 +943,17 @@ "```\n", "1D conv layer with kernel size 4, stride 2, VALID padding:\n", "\n", - " |-----2----| |-----5---... |----23-----|\n", - " |-----1----| |-----4-----| ... |-----22----|\n", - " |-----0----| |-----3----| |---...-21---|\n", - "X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n", - "Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n", + " |-----2-----| |-----5---...------| |-----23----|\n", + " |-----1-----| |-----4-----| ... |-----22----|\n", + " |-----0----| |-----3-----| |---...|-----21----|\n", + "X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 42 43 44 45 46 47 48 49\n", + "Y: 1 2 3 4 5 6 7 8 9 10 11 12 13 ... 43 44 45 46 47 48 49 50\n", + " /10 11 12 13 14 15 16 17 18 19 20 21 22 ... 52 53 54 55 56 57 58 59\n", "\n", "Output:\n", "\n", - "X: 0 1 2 3 4 5 ... 19 20 21 22 23\n", - "Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n", + "X: 0/3 2/5 4/7 6/9 8/11 10/13 .../43 42/45 44/47 46/49\n", + "Y: 4/13 6/15 8/17 10/19 12/21 14/23 .../53 46/55 48/57 50/59\n", "```" ] }, @@ -961,18 +966,15 @@ "np.random.seed(42)\n", "tf.random.set_seed(42)\n", "\n", - "def last_5_time_steps_mse(Y_true, Y_pred):\n", - " return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n", - "\n", "model = keras.models.Sequential([\n", - " keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\",\n", + " keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"valid\",\n", " input_shape=[None, 1]),\n", " keras.layers.GRU(20, return_sequences=True),\n", " keras.layers.GRU(20, return_sequences=True),\n", - " keras.layers.TimeDistributed(keras.layers.Dense(1))\n", + " keras.layers.TimeDistributed(keras.layers.Dense(10))\n", "])\n", "\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n", " validation_data=(X_valid, Y_valid[:, 3::2]))" ] @@ -989,17 +991,13 @@ "metadata": {}, "source": [ "```\n", - "C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n", - " / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n", + "C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\\n", + " \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n", " / \\ / \\ / \\ / \\\n", "C1 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n", "X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n", - "Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n", - "\n", - "Output:\n", - "\n", - "X: 0 1 2 3 4 5 ... 19 20 21 22 23\n", - "Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n", + "Y: 1 2 3 4 5 6 7 8 9 10 11 12 13 ... 44 45 46 47 48 49 50\n", + " /10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n", "```" ] }, @@ -1015,12 +1013,10 @@ "model = keras.models.Sequential()\n", "model.add(keras.layers.InputLayer(input_shape=[None, 1]))\n", "for rate in (1, 2, 4, 8) * 2:\n", - " model.add(keras.layers.Lambda(\n", - " lambda inputs: keras.backend.temporal_padding(inputs, (rate, 0))))\n", - " model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n", + " model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"causal\",\n", " activation=\"relu\", dilation_rate=rate))\n", - "model.add(keras.layers.Conv1D(filters=1, kernel_size=1))\n", - "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n", + "model.add(keras.layers.Conv1D(filters=10, kernel_size=1))\n", + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", "history = model.fit(X_train, Y_train, epochs=20,\n", " validation_data=(X_valid, Y_valid))" ] @@ -1058,9 +1054,8 @@ "outputs": [], "source": [ "def wavenet_residual_block(inputs, n_filters, dilation_rate):\n", - " z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n", - " z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n", - " dilation_rate=dilation_rate)(z)\n", + " z = keras.layers.Conv1D(2 * n_filters, kernel_size=2, padding=\"causal\",\n", + " dilation_rate=dilation_rate)(inputs)\n", " z = GatedActivationUnit()(z)\n", " z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n", " return keras.layers.Add()([z, inputs]), z" @@ -1072,17 +1067,17 @@ "metadata": {}, "outputs": [], "source": [ + "keras.backend.clear_session()\n", "np.random.seed(42)\n", "tf.random.set_seed(42)\n", "\n", - "n_layers_per_block = 10\n", - "n_blocks = 3\n", - "n_filters = 128\n", - "n_outputs = 256\n", + "n_layers_per_block = 3 # 10 in the paper\n", + "n_blocks = 1 # 3 in the paper\n", + "n_filters = 32 # 128 in the paper\n", + "n_outputs = 10 # 256 in the paper\n", "\n", "inputs = keras.layers.Input(shape=[None, 1])\n", - "z = keras.backend.temporal_padding(inputs, (1, 0))\n", - "z = keras.layers.Conv1D(n_filters, kernel_size=2)(z)\n", + "z = keras.layers.Conv1D(n_filters, kernel_size=2, padding=\"causal\")(inputs)\n", "skip_to_last = []\n", "for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n", " z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n", @@ -1100,8 +1095,9 @@ "metadata": {}, "outputs": [], "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n", - "history = model.fit(X_train, Y_train, epochs=2, validation_data=(X_valid, Y_valid))" + "model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n", + "history = model.fit(X_train, Y_train, epochs=2,\n", + " validation_data=(X_valid, Y_valid))" ] }, {