Add MC Dropout, finish chapter

main
Aurélien Geron 2019-02-28 19:48:06 +08:00
parent 6bc603b508
commit ed5c7d6db8
1 changed files with 241 additions and 36 deletions

View File

@ -1150,7 +1150,7 @@
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])" "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
] ]
}, },
{ {
@ -1160,8 +1160,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"n_epochs = 25\n", "n_epochs = 25\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid))" " validation_data=(X_valid_scaled, y_valid))"
] ]
}, },
{ {
@ -1236,7 +1236,7 @@
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
"n_epochs = 25" "n_epochs = 25"
] ]
}, },
@ -1247,8 +1247,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n", "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid),\n", " validation_data=(X_valid_scaled, y_valid),\n",
" callbacks=[lr_scheduler])" " callbacks=[lr_scheduler])"
] ]
}, },
@ -1320,14 +1320,14 @@
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"lr0 = 0.01\n", "lr0 = 0.01\n",
"optimizer = keras.optimizers.SGD(lr=lr0)\n", "optimizer = keras.optimizers.Nadam(lr=lr0)\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
"n_epochs = 25\n", "n_epochs = 25\n",
"\n", "\n",
"s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n", "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n",
"exp_decay = ExponentialDecay(s)\n", "exp_decay = ExponentialDecay(s)\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid),\n", " validation_data=(X_valid_scaled, y_valid),\n",
" callbacks=[exp_decay])" " callbacks=[exp_decay])"
] ]
}, },
@ -1411,10 +1411,10 @@
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
"n_epochs = 25\n", "n_epochs = 25\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid),\n", " validation_data=(X_valid_scaled, y_valid),\n",
" callbacks=[lr_scheduler])" " callbacks=[lr_scheduler])"
] ]
}, },
@ -1464,11 +1464,11 @@
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9)\n", "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
"n_epochs = 25\n", "n_epochs = 25\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid),\n", " validation_data=(X_valid_scaled, y_valid),\n",
" callbacks=[lr_scheduler])" " callbacks=[lr_scheduler])"
] ]
}, },
@ -1518,8 +1518,8 @@
"optimizer = keras.optimizers.SGD(learning_rate)\n", "optimizer = keras.optimizers.SGD(learning_rate)\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
"n_epochs = 25\n", "n_epochs = 25\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid))" " validation_data=(X_valid_scaled, y_valid))"
] ]
}, },
{ {
@ -1583,10 +1583,10 @@
" keras.layers.Dense(10, activation=\"softmax\",\n", " keras.layers.Dense(10, activation=\"softmax\",\n",
" kernel_regularizer=keras.regularizers.l2(0.01))\n", " kernel_regularizer=keras.regularizers.l2(0.01))\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
"n_epochs = 2\n", "n_epochs = 2\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid))" " validation_data=(X_valid_scaled, y_valid))"
] ]
}, },
{ {
@ -1608,10 +1608,10 @@
" RegularizedDense(100),\n", " RegularizedDense(100),\n",
" RegularizedDense(10, activation=\"softmax\")\n", " RegularizedDense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
"n_epochs = 2\n", "n_epochs = 2\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid))" " validation_data=(X_valid_scaled, y_valid))"
] ]
}, },
{ {
@ -1636,10 +1636,10 @@
" keras.layers.Dropout(rate=0.2),\n", " keras.layers.Dropout(rate=0.2),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
"n_epochs = 2\n", "n_epochs = 2\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid))" " validation_data=(X_valid_scaled, y_valid))"
] ]
}, },
{ {
@ -1654,6 +1654,16 @@
"execution_count": 99, "execution_count": 99,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [ "source": [
"model = keras.models.Sequential([\n", "model = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n", " keras.layers.Flatten(input_shape=[28, 28]),\n",
@ -1664,10 +1674,205 @@
" keras.layers.AlphaDropout(rate=0.2),\n", " keras.layers.AlphaDropout(rate=0.2),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
"n_epochs = 2\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "n_epochs = 20\n",
" validation_data=(X_valid, y_valid))" "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid_scaled, y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_test_scaled, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_train_scaled, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"#with keras.backend.learning_phase_scope(1): # TODO: check https://github.com/tensorflow/tensorflow/issues/25754\n",
"# history = model.fit(X_train_scaled, y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MC Dropout"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"with keras.backend.learning_phase_scope(1): # TODO: check https://github.com/tensorflow/tensorflow/issues/25754\n",
" y_probas = np.stack([model.predict(X_test_scaled) for sample in range(100)])\n",
"y_proba = y_probas.mean(axis=0)\n",
"y_std = y_probas.std(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"np.round(model.predict(X_test_scaled[:1]), 2)"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"np.round(y_probas[:, :1], 2)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"np.round(y_proba[:1], 2)"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"y_std = y_probas.std(axis=0)\n",
"np.round(y_std[:1], 2)"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"y_pred = np.argmax(y_proba, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"accuracy = np.sum(y_pred == y_test) / len(y_test)\n",
"accuracy"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"class MCDropout(keras.layers.Dropout):\n",
" def call(self, inputs):\n",
" return super().call(inputs, training=True)\n",
"\n",
"class MCAlphaDropout(keras.layers.AlphaDropout):\n",
" def call(self, inputs):\n",
" return super().call(inputs, training=True)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"mc_model = keras.models.Sequential([\n",
" MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n",
" for layer in model.layers\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"mc_model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
"mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"mc_model.set_weights(model.get_weights())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we can use the model with MC Dropout:"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
"np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)"
] ]
}, },
{ {
@ -1679,7 +1884,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 100, "execution_count": 119,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1689,7 +1894,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 101, "execution_count": 120,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1703,10 +1908,10 @@
" MaxNormDense(100),\n", " MaxNormDense(100),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n", " keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n", "])\n",
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
"n_epochs = 2\n", "n_epochs = 2\n",
"history = model.fit(X_train, y_train, epochs=n_epochs,\n", "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
" validation_data=(X_valid, y_valid))" " validation_data=(X_valid_scaled, y_valid))"
] ]
}, },
{ {
@ -1715,7 +1920,7 @@
"collapsed": true "collapsed": true
}, },
"source": [ "source": [
"# Exercise solutions" "# Exercises"
] ]
}, },
{ {