Use causal padding in WaveNet and fix forecast
parent
01784d2f98
commit
1973371b19
|
@ -500,9 +500,9 @@
|
|||
"\n",
|
||||
"n_steps = 50\n",
|
||||
"series = generate_time_series(10000, n_steps + 10)\n",
|
||||
"X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:]\n",
|
||||
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:]\n",
|
||||
"X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:]"
|
||||
"X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:, 0]\n",
|
||||
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:, 0]\n",
|
||||
"X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:, 0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -516,9 +516,8 @@
|
|||
"\n",
|
||||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
||||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1)),\n",
|
||||
" keras.layers.Lambda(lambda Y_pred: Y_pred[:, -10:])\n",
|
||||
" keras.layers.SimpleRNN(20),\n",
|
||||
" keras.layers.Dense(10)\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\")\n",
|
||||
|
@ -536,7 +535,7 @@
|
|||
"\n",
|
||||
"series = generate_time_series(1, 50 + 10)\n",
|
||||
"X_new, Y_new = series[:, :50, :], series[:, -10:, :]\n",
|
||||
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
||||
"Y_pred = model.predict(X_new)[..., np.newaxis]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -553,7 +552,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's create an RNN that predicts the input sequence, shifted 10 steps into the future. That is, instead of just forecasting time steps 50 to 59 based on time steps 0 to 49, it will forecast time steps 10 to 59 based on time steps 0 to 49 (the time steps 10 to 49 are in the input, but the model is causal so at any time step it cannot see the future inputs):"
|
||||
"Now let's create an RNN that predicts the next 10 steps at each time step. That is, instead of just forecasting time steps 50 to 59 based on time steps 0 to 49, it will forecast time steps 1 to 10 at time step 0, then time steps 2 to 11 at time step 1, and so on, and finally it will forecast time steps 50 to 59 at the last time step. Notice that the model is causal: when it makes predictions at any time step, it can only see past time steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -566,9 +565,15 @@
|
|||
"\n",
|
||||
"n_steps = 50\n",
|
||||
"series = generate_time_series(10000, n_steps + 10)\n",
|
||||
"X_train, Y_train = series[:7000, :n_steps], series[:7000, 10:]\n",
|
||||
"X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, 10:]\n",
|
||||
"X_test, Y_test = series[9000:, :n_steps], series[9000:, 10:]"
|
||||
"X_train = series[:7000, :n_steps]\n",
|
||||
"X_valid = series[7000:9000, :n_steps]\n",
|
||||
"X_test = series[9000:, :n_steps]\n",
|
||||
"Y = np.empty((10000, n_steps, 10))\n",
|
||||
"for step_ahead in range(1, 10 + 1):\n",
|
||||
" Y[..., step_ahead - 1] = series[..., step_ahead:step_ahead + n_steps, 0]\n",
|
||||
"Y_train = Y[:7000]\n",
|
||||
"Y_valid = Y[7000:9000]\n",
|
||||
"Y_test = Y[9000:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -592,13 +597,13 @@
|
|||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
||||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"def last_10_time_steps_mse(Y_true, Y_pred):\n",
|
||||
" return keras.metrics.mean_squared_error(Y_true[:, -10:], Y_pred[:, -10:])\n",
|
||||
"def last_time_step_mse(Y_true, Y_pred):\n",
|
||||
" return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -613,7 +618,7 @@
|
|||
"\n",
|
||||
"series = generate_time_series(1, 50 + 10)\n",
|
||||
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
||||
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
||||
"Y_pred = model.predict(X_new)[:, -1][..., np.newaxis]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -642,16 +647,15 @@
|
|||
"np.random.seed(42)\n",
|
||||
"tf.random.set_seed(42)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),\n",
|
||||
" keras.layers.BatchNormalization(),\n",
|
||||
" keras.layers.SimpleRNN(20, return_sequences=True),\n",
|
||||
" keras.layers.BatchNormalization(),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -711,10 +715,10 @@
|
|||
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True,\n",
|
||||
" input_shape=[None, 1]),\n",
|
||||
" keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -771,10 +775,10 @@
|
|||
" MyRNN(LNSimpleRNNCell(20), return_sequences=True,\n",
|
||||
" input_shape=[None, 1]),\n",
|
||||
" MyRNN(LNSimpleRNNCell(20), return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -800,10 +804,10 @@
|
|||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.LSTM(20, return_sequences=True, input_shape=[None, 1]),\n",
|
||||
" keras.layers.LSTM(20, return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -837,7 +841,7 @@
|
|||
"\n",
|
||||
"series = generate_time_series(1, 50 + 10)\n",
|
||||
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
||||
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
||||
"Y_pred = model.predict(X_new)[:, -1][..., np.newaxis]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -873,10 +877,10 @@
|
|||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.GRU(20, return_sequences=True, input_shape=[None, 1]),\n",
|
||||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -910,7 +914,7 @@
|
|||
"\n",
|
||||
"series = generate_time_series(1, 50 + 10)\n",
|
||||
"X_new, Y_new = series[:, :50, :], series[:, 50:, :]\n",
|
||||
"Y_pred = model.predict(X_new)[:, -10:, :]"
|
||||
"Y_pred = model.predict(X_new)[:, -1][..., np.newaxis]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -939,16 +943,17 @@
|
|||
"```\n",
|
||||
"1D conv layer with kernel size 4, stride 2, VALID padding:\n",
|
||||
"\n",
|
||||
" |-----2----| |-----5---... |----23-----|\n",
|
||||
" |-----1----| |-----4-----| ... |-----22----|\n",
|
||||
" |-----0----| |-----3----| |---...-21---|\n",
|
||||
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
|
||||
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
||||
" |-----2-----| |-----5---...------| |-----23----|\n",
|
||||
" |-----1-----| |-----4-----| ... |-----22----|\n",
|
||||
" |-----0----| |-----3-----| |---...|-----21----|\n",
|
||||
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 42 43 44 45 46 47 48 49\n",
|
||||
"Y: 1 2 3 4 5 6 7 8 9 10 11 12 13 ... 43 44 45 46 47 48 49 50\n",
|
||||
" /10 11 12 13 14 15 16 17 18 19 20 21 22 ... 52 53 54 55 56 57 58 59\n",
|
||||
"\n",
|
||||
"Output:\n",
|
||||
"\n",
|
||||
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
|
||||
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
|
||||
"X: 0/3 2/5 4/7 6/9 8/11 10/13 .../43 42/45 44/47 46/49\n",
|
||||
"Y: 4/13 6/15 8/17 10/19 12/21 14/23 .../53 46/55 48/57 50/59\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
@ -961,18 +966,15 @@
|
|||
"np.random.seed(42)\n",
|
||||
"tf.random.set_seed(42)\n",
|
||||
"\n",
|
||||
"def last_5_time_steps_mse(Y_true, Y_pred):\n",
|
||||
" return keras.metrics.mean_squared_error(Y_true[:, -5:], Y_pred[:, -5:])\n",
|
||||
"\n",
|
||||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"VALID\",\n",
|
||||
" keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding=\"valid\",\n",
|
||||
" input_shape=[None, 1]),\n",
|
||||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||||
" keras.layers.GRU(20, return_sequences=True),\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(1))\n",
|
||||
" keras.layers.TimeDistributed(keras.layers.Dense(10))\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_5_time_steps_mse])\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train[:, 3::2], epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid[:, 3::2]))"
|
||||
]
|
||||
|
@ -989,17 +991,13 @@
|
|||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\ \n",
|
||||
" / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n",
|
||||
"C2 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\.../\\ /\\ /\\ /\\ /\\ /\\\n",
|
||||
" \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\ / \\\n",
|
||||
" / \\ / \\ / \\ / \\\n",
|
||||
"C1 /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /\\ /.../\\ /\\ /\\ /\\ /\\ /\\ /\\\n",
|
||||
"X: 0 1 2 3 4 5 6 7 8 9 10 11 12 ... 43 44 45 46 47 48 49\n",
|
||||
"Y: 10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
||||
"\n",
|
||||
"Output:\n",
|
||||
"\n",
|
||||
"X: 0 1 2 3 4 5 ... 19 20 21 22 23\n",
|
||||
"Y: 13 15 17 19 21 23 ... 51 53 55 57 59\n",
|
||||
"Y: 1 2 3 4 5 6 7 8 9 10 11 12 13 ... 44 45 46 47 48 49 50\n",
|
||||
" /10 11 12 13 14 15 16 17 18 19 20 21 22 ... 53 54 55 56 57 58 59\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
@ -1015,12 +1013,10 @@
|
|||
"model = keras.models.Sequential()\n",
|
||||
"model.add(keras.layers.InputLayer(input_shape=[None, 1]))\n",
|
||||
"for rate in (1, 2, 4, 8) * 2:\n",
|
||||
" model.add(keras.layers.Lambda(\n",
|
||||
" lambda inputs: keras.backend.temporal_padding(inputs, (rate, 0))))\n",
|
||||
" model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"VALID\",\n",
|
||||
" model.add(keras.layers.Conv1D(filters=20, kernel_size=2, padding=\"causal\",\n",
|
||||
" activation=\"relu\", dilation_rate=rate))\n",
|
||||
"model.add(keras.layers.Conv1D(filters=1, kernel_size=1))\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_10_time_steps_mse])\n",
|
||||
"model.add(keras.layers.Conv1D(filters=10, kernel_size=1))\n",
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=20,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
|
@ -1058,9 +1054,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"def wavenet_residual_block(inputs, n_filters, dilation_rate):\n",
|
||||
" z = keras.backend.temporal_padding(inputs, (dilation_rate, 0))\n",
|
||||
" z = keras.layers.Conv1D(2 * n_filters, kernel_size=2,\n",
|
||||
" dilation_rate=dilation_rate)(z)\n",
|
||||
" z = keras.layers.Conv1D(2 * n_filters, kernel_size=2, padding=\"causal\",\n",
|
||||
" dilation_rate=dilation_rate)(inputs)\n",
|
||||
" z = GatedActivationUnit()(z)\n",
|
||||
" z = keras.layers.Conv1D(n_filters, kernel_size=1)(z)\n",
|
||||
" return keras.layers.Add()([z, inputs]), z"
|
||||
|
@ -1072,17 +1067,17 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"keras.backend.clear_session()\n",
|
||||
"np.random.seed(42)\n",
|
||||
"tf.random.set_seed(42)\n",
|
||||
"\n",
|
||||
"n_layers_per_block = 10\n",
|
||||
"n_blocks = 3\n",
|
||||
"n_filters = 128\n",
|
||||
"n_outputs = 256\n",
|
||||
"n_layers_per_block = 3 # 10 in the paper\n",
|
||||
"n_blocks = 1 # 3 in the paper\n",
|
||||
"n_filters = 32 # 128 in the paper\n",
|
||||
"n_outputs = 10 # 256 in the paper\n",
|
||||
"\n",
|
||||
"inputs = keras.layers.Input(shape=[None, 1])\n",
|
||||
"z = keras.backend.temporal_padding(inputs, (1, 0))\n",
|
||||
"z = keras.layers.Conv1D(n_filters, kernel_size=2)(z)\n",
|
||||
"z = keras.layers.Conv1D(n_filters, kernel_size=2, padding=\"causal\")(inputs)\n",
|
||||
"skip_to_last = []\n",
|
||||
"for dilation_rate in [2**i for i in range(n_layers_per_block)] * n_blocks:\n",
|
||||
" z, skip = wavenet_residual_block(z, n_filters, dilation_rate)\n",
|
||||
|
@ -1100,8 +1095,9 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\")\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=2, validation_data=(X_valid, Y_valid))"
|
||||
"model.compile(loss=\"mse\", optimizer=\"adam\", metrics=[last_time_step_mse])\n",
|
||||
"history = model.fit(X_train, Y_train, epochs=2,\n",
|
||||
" validation_data=(X_valid, Y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue