diff --git a/11_deep_learning.ipynb b/11_deep_learning.ipynb index d903bca..d470157 100644 --- a/11_deep_learning.ipynb +++ b/11_deep_learning.ipynb @@ -1150,7 +1150,7 @@ " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])" + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" ] }, { @@ -1160,8 +1160,8 @@ "outputs": [], "source": [ "n_epochs = 25\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" ] }, { @@ -1236,7 +1236,7 @@ " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", "n_epochs = 25" ] }, @@ -1247,8 +1247,8 @@ "outputs": [], "source": [ "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid),\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", " callbacks=[lr_scheduler])" ] }, @@ -1320,14 +1320,14 @@ " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", "lr0 = 0.01\n", - "optimizer = keras.optimizers.SGD(lr=lr0)\n", + "optimizer = keras.optimizers.Nadam(lr=lr0)\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", "n_epochs = 25\n", "\n", "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n", "exp_decay = ExponentialDecay(s)\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid),\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", " callbacks=[exp_decay])" ] }, @@ -1411,10 +1411,10 @@ " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", "n_epochs = 25\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid),\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", " callbacks=[lr_scheduler])" ] }, @@ -1464,11 +1464,11 @@ " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9)\n", + "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", "n_epochs = 25\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid),\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", " callbacks=[lr_scheduler])" ] }, @@ -1518,8 +1518,8 @@ "optimizer = keras.optimizers.SGD(learning_rate)\n", "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", "n_epochs = 25\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" ] }, { @@ -1583,10 +1583,10 @@ " keras.layers.Dense(10, activation=\"softmax\",\n", " kernel_regularizer=keras.regularizers.l2(0.01))\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", "n_epochs = 2\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" ] }, { @@ -1608,10 +1608,10 @@ " RegularizedDense(100),\n", " RegularizedDense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", "n_epochs = 2\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" ] }, { @@ -1636,10 +1636,10 @@ " keras.layers.Dropout(rate=0.2),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", "n_epochs = 2\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" ] }, { @@ -1654,6 +1654,16 @@ "execution_count": 99, "metadata": {}, "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Flatten(input_shape=[28, 28]),\n", @@ -1664,10 +1674,205 @@ " keras.layers.AlphaDropout(rate=0.2),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 20\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_test_scaled, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "#with keras.backend.learning_phase_scope(1): # TODO: check https://github.com/tensorflow/tensorflow/issues/25754\n", + "# history = model.fit(X_train_scaled, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MC Dropout" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "with keras.backend.learning_phase_scope(1): # TODO: check https://github.com/tensorflow/tensorflow/issues/25754\n", + " y_probas = np.stack([model.predict(X_test_scaled) for sample in range(100)])\n", + "y_proba = y_probas.mean(axis=0)\n", + "y_std = y_probas.std(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "np.round(model.predict(X_test_scaled[:1]), 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "np.round(y_probas[:, :1], 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "np.round(y_proba[:1], 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "y_std = y_probas.std(axis=0)\n", + "np.round(y_std[:1], 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = np.argmax(y_proba, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = np.sum(y_pred == y_test) / len(y_test)\n", + "accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "class MCDropout(keras.layers.Dropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)\n", + "\n", + "class MCAlphaDropout(keras.layers.AlphaDropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [], + "source": [ + "mc_model = keras.models.Sequential([\n", + " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", + " for layer in model.layers\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "mc_model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", + "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "mc_model.set_weights(model.get_weights())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can use the model with MC Dropout:" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [], + "source": [ + "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)" ] }, { @@ -1679,7 +1884,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -1689,7 +1894,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -1703,10 +1908,10 @@ " MaxNormDense(100),\n", " keras.layers.Dense(10, activation=\"softmax\")\n", "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"sgd\", metrics=[\"accuracy\"])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", "n_epochs = 2\n", - "history = model.fit(X_train, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid, y_valid))" + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" ] }, { @@ -1715,7 +1920,7 @@ "collapsed": true }, "source": [ - "# Exercise solutions" + "# Exercises" ] }, {