Add MC Dropout, finish chapter
parent
6bc603b508
commit
ed5c7d6db8
|
@ -1150,7 +1150,7 @@
|
|||
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])"
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1160,8 +1160,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"n_epochs = 25\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1236,7 +1236,7 @@
|
|||
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 25"
|
||||
]
|
||||
},
|
||||
|
@ -1247,8 +1247,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid),\n",
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid),\n",
|
||||
" callbacks=[lr_scheduler])"
|
||||
]
|
||||
},
|
||||
|
@ -1320,14 +1320,14 @@
|
|||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"lr0 = 0.01\n",
|
||||
"optimizer = keras.optimizers.SGD(lr=lr0)\n",
|
||||
"optimizer = keras.optimizers.Nadam(lr=lr0)\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 25\n",
|
||||
"\n",
|
||||
"s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n",
|
||||
"exp_decay = ExponentialDecay(s)\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid),\n",
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid),\n",
|
||||
" callbacks=[exp_decay])"
|
||||
]
|
||||
},
|
||||
|
@ -1411,10 +1411,10 @@
|
|||
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 25\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid),\n",
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid),\n",
|
||||
" callbacks=[lr_scheduler])"
|
||||
]
|
||||
},
|
||||
|
@ -1464,11 +1464,11 @@
|
|||
" keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9)\n",
|
||||
"optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 25\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid),\n",
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid),\n",
|
||||
" callbacks=[lr_scheduler])"
|
||||
]
|
||||
},
|
||||
|
@ -1518,8 +1518,8 @@
|
|||
"optimizer = keras.optimizers.SGD(learning_rate)\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 25\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1583,10 +1583,10 @@
|
|||
" keras.layers.Dense(10, activation=\"softmax\",\n",
|
||||
" kernel_regularizer=keras.regularizers.l2(0.01))\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 2\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1608,10 +1608,10 @@
|
|||
" RegularizedDense(100),\n",
|
||||
" RegularizedDense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 2\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1636,10 +1636,10 @@
|
|||
" keras.layers.Dropout(rate=0.2),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 2\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1654,6 +1654,16 @@
|
|||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.random.set_seed(42)\n",
|
||||
"np.random.seed(42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = keras.models.Sequential([\n",
|
||||
" keras.layers.Flatten(input_shape=[28, 28]),\n",
|
||||
|
@ -1664,10 +1674,205 @@
|
|||
" keras.layers.AlphaDropout(rate=0.2),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 2\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 20\n",
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.evaluate(X_test_scaled, y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.evaluate(X_train_scaled, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#with keras.backend.learning_phase_scope(1): # TODO: check https://github.com/tensorflow/tensorflow/issues/25754\n",
|
||||
"# history = model.fit(X_train_scaled, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## MC Dropout"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.random.set_seed(42)\n",
|
||||
"np.random.seed(42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with keras.backend.learning_phase_scope(1): # TODO: check https://github.com/tensorflow/tensorflow/issues/25754\n",
|
||||
" y_probas = np.stack([model.predict(X_test_scaled) for sample in range(100)])\n",
|
||||
"y_proba = y_probas.mean(axis=0)\n",
|
||||
"y_std = y_probas.std(axis=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.round(model.predict(X_test_scaled[:1]), 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.round(y_probas[:, :1], 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.round(y_proba[:1], 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_std = y_probas.std(axis=0)\n",
|
||||
"np.round(y_std[:1], 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = np.argmax(y_proba, axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"accuracy = np.sum(y_pred == y_test) / len(y_test)\n",
|
||||
"accuracy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class MCDropout(keras.layers.Dropout):\n",
|
||||
" def call(self, inputs):\n",
|
||||
" return super().call(inputs, training=True)\n",
|
||||
"\n",
|
||||
"class MCAlphaDropout(keras.layers.AlphaDropout):\n",
|
||||
" def call(self, inputs):\n",
|
||||
" return super().call(inputs, training=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 113,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tf.random.set_seed(42)\n",
|
||||
"np.random.seed(42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 114,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mc_model = keras.models.Sequential([\n",
|
||||
" MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n",
|
||||
" for layer in model.layers\n",
|
||||
"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 115,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mc_model.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
|
||||
"mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 117,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mc_model.set_weights(model.get_weights())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we can use the model with MC Dropout:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 118,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1679,7 +1884,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"execution_count": 119,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1689,7 +1894,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"execution_count": 120,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1703,10 +1908,10 @@
|
|||
" MaxNormDense(100),\n",
|
||||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||||
"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n",
|
||||
"model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
|
||||
"n_epochs = 2\n",
|
||||
"history = model.fit(X_train, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid, y_valid))"
|
||||
"history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
|
||||
" validation_data=(X_valid_scaled, y_valid))"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1715,7 +1920,7 @@
|
|||
"collapsed": true
|
||||
},
|
||||
"source": [
|
||||
"# Exercise solutions"
|
||||
"# Exercises"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue