diff --git a/17_autoencoders.ipynb b/17_autoencoders.ipynb
index d770238..e241ee4 100644
--- a/17_autoencoders.ipynb
+++ b/17_autoencoders.ipynb
@@ -91,7 +91,7 @@
    "outputs": [],
    "source": [
     "def plot_image(image):\n",
-    "    plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n",
+    "    plt.imshow(image, cmap=\"binary\")\n",
     "    plt.axis(\"off\")"
    ]
   },
@@ -117,34 +117,16 @@
    "source": [
     "np.random.seed(4)\n",
     "\n",
-    "m = 200\n",
-    "w1, w2 = 0.1, 0.3\n",
-    "noise = 0.1\n",
+    "def generate_3d_data(m, w1=0.1, w2=0.3, noise=0.1):\n",
+    "    angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5\n",
+    "    data = np.empty((m, 3))\n",
+    "    data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2\n",
+    "    data[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2\n",
+    "    data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * np.random.randn(m)\n",
+    "    return data\n",
     "\n",
-    "angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5\n",
-    "data = np.empty((m, 3))\n",
-    "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2\n",
-    "data[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2\n",
-    "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * np.random.randn(m)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Normalize the data:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.preprocessing import StandardScaler\n",
-    "scaler = StandardScaler()\n",
-    "X_train = scaler.fit_transform(data[:100])\n",
-    "X_test = scaler.transform(data[100:])"
+    "X_train = generate_3d_data(60)\n",
+    "X_train = X_train - X_train.mean(axis=0, keepdims=0)"
    ]
   },
   {
@@ -156,18 +138,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
+    "np.random.seed(42)\n",
     "tf.random.set_seed(42)\n",
     "\n",
     "encoder = keras.models.Sequential([keras.layers.Dense(2, input_shape=[3])])\n",
     "decoder = keras.models.Sequential([keras.layers.Dense(3, input_shape=[2])])\n",
     "autoencoder = keras.models.Sequential([encoder, decoder])\n",
     "\n",
-    "autoencoder.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.1))\n",
-    "history = autoencoder.fit(X_train, X_train, epochs=20, validation_data=[X_test, X_test])"
+    "autoencoder.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history = autoencoder.fit(X_train, X_train, epochs=20)"
    ]
   },
   {
@@ -176,7 +167,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "codings_test = encoder.predict(X_test)"
+    "codings = encoder.predict(X_train)"
    ]
   },
   {
@@ -186,9 +177,10 @@
    "outputs": [],
    "source": [
     "fig = plt.figure(figsize=(4,3))\n",
-    "plt.plot(codings_test[:,0], codings_test[:, 1], \"b.\")\n",
+    "plt.plot(codings[:,0], codings[:, 1], \"b.\")\n",
     "plt.xlabel(\"$z_1$\", fontsize=18)\n",
     "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "plt.grid(True)\n",
     "save_fig(\"linear_autoencoder_pca_plot\")\n",
     "plt.show()"
    ]
@@ -214,8 +206,8 @@
    "outputs": [],
    "source": [
     "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
-    "X_train_full = X_train_full / 255\n",
-    "X_test = X_test / 255\n",
+    "X_train_full = X_train_full.astype(np.float32) / 255\n",
+    "X_test = X_test.astype(np.float32) / 255\n",
     "X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]\n",
     "y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]"
    ]
@@ -239,6 +231,16 @@
    "execution_count": 9,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "def rounded_accuracy(y_true, y_pred):\n",
+    "    return keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "tf.random.set_seed(42)\n",
     "np.random.seed(42)\n",
@@ -254,9 +256,9 @@
     "    keras.layers.Reshape([28, 28])\n",
     "])\n",
     "stacked_ae = keras.models.Sequential([stacked_encoder, stacked_decoder])\n",
-    "stacked_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n",
-    "                   metrics=[\"accuracy\"])\n",
-    "history = stacked_ae.fit(X_train, X_train, epochs=10,\n",
+    "stacked_ae.compile(loss=\"binary_crossentropy\",\n",
+    "                   optimizer=keras.optimizers.SGD(lr=1.5), metrics=[rounded_accuracy])\n",
+    "history = stacked_ae.fit(X_train, X_train, epochs=20,\n",
     "                         validation_data=[X_valid, X_valid])"
    ]
   },
@@ -269,23 +271,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def show_reconstructions(model, n_test_images = 2):\n",
-    "    reconstructions = model.predict(X_test[:n_test_images])\n",
-    "    fig = plt.figure(figsize=(8, 3 * n_test_images))\n",
-    "    for image_index in range(n_test_images):\n",
-    "        plt.subplot(n_test_images, 2, image_index * 2 + 1)\n",
-    "        plot_image(X_test[image_index])\n",
-    "        plt.subplot(n_test_images, 2, image_index * 2 + 2)\n",
+    "def show_reconstructions(model, images=X_valid, n_images=5):\n",
+    "    reconstructions = model.predict(images[:n_images])\n",
+    "    fig = plt.figure(figsize=(n_images * 1.5, 3))\n",
+    "    for image_index in range(n_images):\n",
+    "        plt.subplot(2, n_images, 1 + image_index)\n",
+    "        plot_image(images[image_index])\n",
+    "        plt.subplot(2, n_images, 1 + n_images + image_index)\n",
     "        plot_image(reconstructions[image_index])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -293,6 +295,71 @@
     "save_fig(\"reconstruction_plot\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualizing Fashion MNIST"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "\n",
+    "from sklearn.manifold import TSNE\n",
+    "\n",
+    "X_valid_compressed = stacked_encoder.predict(X_valid)\n",
+    "tsne = TSNE()\n",
+    "X_valid_2D = tsne.fit_transform(X_valid_compressed)\n",
+    "X_valid_2D = (X_valid_2D - X_valid_2D.min()) / (X_valid_2D.max() - X_valid_2D.min())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.scatter(X_valid_2D[:, 0], X_valid_2D[:, 1], c=y_valid, s=10, cmap=\"tab10\")\n",
+    "plt.axis(\"off\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's make this diagram a bit prettier:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# adapted from https://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html\n",
+    "plt.figure(figsize=(10, 8))\n",
+    "cmap = plt.cm.tab10\n",
+    "plt.scatter(X_valid_2D[:, 0], X_valid_2D[:, 1], c=y_valid, s=10, cmap=cmap)\n",
+    "image_positions = np.array([[1., 1.]])\n",
+    "for index, position in enumerate(X_valid_2D):\n",
+    "    dist = np.sum((position - image_positions) ** 2, axis=1)\n",
+    "    if np.min(dist) > 0.02: # if far enough from other images\n",
+    "        image_positions = np.r_[image_positions, [position]]\n",
+    "        imagebox = mpl.offsetbox.AnnotationBbox(\n",
+    "            mpl.offsetbox.OffsetImage(X_valid[index], cmap=\"binary\"),\n",
+    "            position, bboxprops={\"edgecolor\": cmap(y_valid[index]), \"lw\": 2})\n",
+    "        plt.gca().add_artist(imagebox)\n",
+    "plt.axis(\"off\")\n",
+    "save_fig(\"fashion_mnist_visualization_plot\")\n",
+    "plt.show()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -309,7 +376,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -321,16 +388,18 @@
     "        self.activation = keras.activations.get(activation)\n",
     "        super().__init__(**kwargs)\n",
     "    def build(self, batch_input_shape):\n",
-    "        self.biases = self.add_weight(name=\"bias\", shape=[self.dense.input_shape[-1]],\n",
+    "        self.biases = self.add_weight(name=\"bias\",\n",
+    "                                      shape=[self.dense.input_shape[-1]],\n",
     "                                      initializer=\"zeros\")\n",
     "        super().build(batch_input_shape)\n",
     "    def call(self, inputs):\n",
-    "        return self.activation(inputs @ K.transpose(self.dense.weights[0]) + self.biases)"
+    "        z = inputs @ K.transpose(self.dense.weights[0]) + self.biases\n",
+    "        return self.activation(z)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -348,14 +417,15 @@
     "    keras.layers.Reshape([28, 28])\n",
     "])\n",
     "tied_ae = keras.models.Sequential([tied_encoder, tied_decoder])\n",
-    "tied_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n",
-    "                metrics=[\"accuracy\"])\n",
-    "history = tied_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])"
+    "tied_ae.compile(loss=\"binary_crossentropy\",\n",
+    "                optimizer=keras.optimizers.SGD(lr=1.5), metrics=[rounded_accuracy])\n",
+    "history = tied_ae.fit(X_train, X_train, epochs=10,\n",
+    "                      validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 18,
    "metadata": {
     "scrolled": true
    },
@@ -374,12 +444,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer, metrics=None,\n",
-    "                      n_epochs=10, output_activation=None):\n",
+    "def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer,\n",
+    "                      n_epochs=10, output_activation=None, metrics=None):\n",
     "    n_inputs = X_train.shape[-1]\n",
     "    encoder = keras.models.Sequential([\n",
     "        keras.layers.Dense(n_neurons, activation=\"selu\", input_shape=[n_inputs])\n",
@@ -396,95 +466,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "tf.random.set_seed(42)\n",
     "np.random.seed(42)\n",
     "\n",
-    "X_train_flat = keras.layers.Flatten()(X_train)\n",
-    "X_valid_flat = keras.layers.Flatten()(X_valid)\n",
+    "X_train_flat = K.batch_flatten(X_train) # equivalent to .reshape(-1, 28 * 28)\n",
+    "X_valid_flat = K.batch_flatten(X_valid)\n",
     "enc1, dec1, X_train_enc1, X_valid_enc1 = train_autoencoder(\n",
-    "    100, X_train_flat, X_valid_flat, \"binary_crossentropy\", keras.optimizers.SGD(lr=0.1),\n",
-    "    output_activation=\"sigmoid\", metrics=[\"accuracy\"])\n",
+    "    100, X_train_flat, X_valid_flat, \"binary_crossentropy\",\n",
+    "    keras.optimizers.SGD(lr=1.5), output_activation=\"sigmoid\",\n",
+    "    metrics=[rounded_accuracy])\n",
     "enc2, dec2, _, _ = train_autoencoder(\n",
-    "    30, X_train_enc1, X_valid_enc1, \"mse\", keras.optimizers.Adam(),\n",
+    "    30, X_train_enc1, X_valid_enc1, \"mse\", keras.optimizers.SGD(lr=0.05),\n",
     "    output_activation=\"selu\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "stacked_ae_1_by_1 = keras.models.Sequential([\n",
-    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
-    "    enc1,\n",
-    "    enc2,\n",
-    "    dec2,\n",
-    "    dec1,\n",
-    "    keras.layers.Reshape([28, 28])\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_reconstructions(stacked_ae_1_by_1)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "stacked_ae_1_by_1.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.5),\n",
-    "                          metrics=[\"accuracy\"])\n",
-    "history = stacked_ae_1_by_1.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_reconstructions(stacked_ae_1_by_1)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Visualizing the extracted features"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "weights1 = stacked_ae_1_by_1.layers[1].get_weights()[0]\n",
-    "plt.figure(figsize=(8, 2))\n",
-    "n_rows, n_cols = 2, 8\n",
-    "for row in range(n_rows):\n",
-    "    for col in range(n_cols):\n",
-    "        index = row * n_cols + col\n",
-    "        plt.subplot(n_rows, n_cols, index + 1)\n",
-    "        plt.imshow(weights1[:, index].reshape(28, 28), cmap=\"Greys\")\n",
-    "        plt.axis(\"off\")\n",
-    "\n",
-    "save_fig(\"extracted_features_plot\", tight_layout=False) # not shown\n",
-    "plt.show()                          # not shown"
+    "stacked_ae_1_by_1 = keras.models.Sequential([\n",
+    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
+    "    enc1, enc2, dec2, dec1,\n",
+    "    keras.layers.Reshape([28, 28])\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_reconstructions(stacked_ae_1_by_1)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stacked_ae_1_by_1.compile(loss=\"binary_crossentropy\",\n",
+    "                          optimizer=keras.optimizers.SGD(lr=0.1), metrics=[rounded_accuracy])\n",
+    "history = stacked_ae_1_by_1.fit(X_train, X_train, epochs=10,\n",
+    "                                validation_data=[X_valid, X_valid])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_reconstructions(stacked_ae_1_by_1)\n",
+    "plt.show()"
    ]
   },
   {
@@ -503,7 +545,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -529,14 +571,14 @@
     "conv_ae = keras.models.Sequential([conv_encoder, conv_decoder])\n",
     "\n",
     "conv_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
-    "                metrics=[\"accuracy\"])\n",
+    "                metrics=[rounded_accuracy])\n",
     "history = conv_ae.fit(X_train, X_train, epochs=5,\n",
     "                      validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -546,7 +588,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -558,72 +600,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Unsupervised pretraining"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's create a small neural network for MNIST classification:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tf.random.set_seed(42)\n",
-    "np.random.seed(42)\n",
-    "\n",
-    "X_train_small = X_train[:500]\n",
-    "y_train_small = y_train[:500]\n",
-    "\n",
-    "classifier = keras.models.Sequential([\n",
-    "    keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n",
-    "    keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
-    "    keras.layers.MaxPool2D(pool_size=2),\n",
-    "    keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
-    "    keras.layers.MaxPool2D(pool_size=2),\n",
-    "    keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
-    "    keras.layers.MaxPool2D(pool_size=2),\n",
-    "    keras.layers.Flatten(),\n",
-    "    keras.layers.Dense(20, activation=\"selu\"),\n",
-    "    keras.layers.Dense(10, activation=\"softmax\")\n",
-    "])\n",
-    "classifier.compile(loss=\"sparse_categorical_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.02), metrics=[\"accuracy\"])\n",
-    "history = classifier.fit(X_train_small, y_train_small, epochs=20, validation_data=[X_valid, y_valid])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "pd.DataFrame(history.history).plot()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tf.random.set_seed(42)\n",
-    "np.random.seed(42)\n",
-    "\n",
-    "conv_encoder_clone = keras.models.clone_model(conv_encoder)\n",
-    "\n",
-    "pretrained_clf = keras.models.Sequential([\n",
-    "    conv_encoder_clone,\n",
-    "    keras.layers.Flatten(),\n",
-    "    keras.layers.Dense(20, activation=\"selu\"),\n",
-    "    keras.layers.Dense(10, activation=\"softmax\")\n",
-    "])"
+    "# Recurrent Autoencoders"
    ]
   },
   {
@@ -632,12 +609,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "conv_encoder_clone.trainable = False\n",
-    "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n",
-    "                       optimizer=keras.optimizers.SGD(lr=0.02),\n",
-    "                       metrics=[\"accuracy\"])\n",
-    "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=30,\n",
-    "                             validation_data=[X_valid, y_valid])"
+    "recurrent_encoder = keras.models.Sequential([\n",
+    "    keras.layers.LSTM(100, return_sequences=True, input_shape=[28, 28]),\n",
+    "    keras.layers.LSTM(30)\n",
+    "])\n",
+    "recurrent_decoder = keras.models.Sequential([\n",
+    "    keras.layers.RepeatVector(28, input_shape=[30]),\n",
+    "    keras.layers.LSTM(100, return_sequences=True),\n",
+    "    keras.layers.TimeDistributed(keras.layers.Dense(28, activation=\"sigmoid\"))\n",
+    "])\n",
+    "recurrent_ae = keras.models.Sequential([recurrent_encoder, recurrent_decoder])\n",
+    "recurrent_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(0.1),\n",
+    "                     metrics=[rounded_accuracy])"
    ]
   },
   {
@@ -646,12 +629,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "conv_encoder_clone.trainable = True\n",
-    "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n",
-    "                       optimizer=keras.optimizers.SGD(lr=0.02),\n",
-    "                       metrics=[\"accuracy\"])\n",
-    "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=20,\n",
-    "                             validation_data=[X_valid, y_valid])"
+    "history = recurrent_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_reconstructions(recurrent_ae)\n",
+    "plt.show()"
    ]
   },
   {
@@ -670,7 +658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -679,7 +667,7 @@
     "\n",
     "denoising_encoder = keras.models.Sequential([\n",
     "    keras.layers.Flatten(input_shape=[28, 28]),\n",
-    "    keras.layers.GaussianNoise(1.0),\n",
+    "    keras.layers.GaussianNoise(0.2),\n",
     "    keras.layers.Dense(100, activation=\"selu\"),\n",
     "    keras.layers.Dense(30, activation=\"selu\")\n",
     "])\n",
@@ -690,18 +678,22 @@
     "])\n",
     "denoising_ae = keras.models.Sequential([denoising_encoder, denoising_decoder])\n",
     "denoising_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
-    "                     metrics=[\"accuracy\"])\n",
+    "                     metrics=[rounded_accuracy])\n",
     "history = denoising_ae.fit(X_train, X_train, epochs=10,\n",
     "                           validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
-    "show_reconstructions(denoising_ae)\n",
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "noise = keras.layers.GaussianNoise(0.2)\n",
+    "show_reconstructions(denoising_ae, noise(X_valid, training=True))\n",
     "plt.show()"
    ]
   },
@@ -714,7 +706,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -734,19 +726,23 @@
     "])\n",
     "dropout_ae = keras.models.Sequential([dropout_encoder, dropout_decoder])\n",
     "dropout_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
-    "                   metrics=[\"accuracy\"])\n",
+    "                   metrics=[rounded_accuracy])\n",
     "history = dropout_ae.fit(X_train, X_train, epochs=10,\n",
     "                         validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
-    "show_reconstructions(dropout_ae)\n",
-    "plt.show()"
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "dropout = keras.layers.Dropout(0.5)\n",
+    "show_reconstructions(dropout_ae, dropout(X_valid, training=True))\n",
+    "save_fig(\"dropout_denoising_plot\", tight_layout=False)"
    ]
   },
   {
@@ -760,12 +756,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's build a simple stacked autoencoder, but this time we will use the sigmoid activation function for the coding layer, to ensure that the coding values range from 0 to 1:"
+    "Let's build a simple stacked autoencoder, so we can compare it to the sparse autoencoders we will build. This time we will use the sigmoid activation function for the coding layer, to ensure that the coding values range from 0 to 1:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -784,14 +780,14 @@
     "])\n",
     "simple_ae = keras.models.Sequential([simple_encoder, simple_decoder])\n",
     "simple_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n",
-    "                  metrics=[\"accuracy\"])\n",
+    "                  metrics=[rounded_accuracy])\n",
     "history = simple_ae.fit(X_train, X_train, epochs=10,\n",
     "                        validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -808,7 +804,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -825,7 +821,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -858,7 +854,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -875,7 +871,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -897,14 +893,14 @@
     "])\n",
     "sparse_l1_ae = keras.models.Sequential([sparse_l1_encoder, sparse_l1_decoder])\n",
     "sparse_l1_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
-    "                     metrics=[\"accuracy\"])\n",
+    "                     metrics=[rounded_accuracy])\n",
     "history = sparse_l1_ae.fit(X_train, X_train, epochs=10,\n",
     "                           validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -913,7 +909,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -930,7 +926,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -953,11 +949,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
     "K = keras.backend\n",
+    "kl_divergence = keras.losses.kullback_leibler_divergence\n",
     "\n",
     "class KLDivergenceRegularizer(keras.regularizers.Regularizer):\n",
     "    def __init__(self, weight, target=0.1):\n",
@@ -966,13 +963,13 @@
     "    def __call__(self, inputs):\n",
     "        mean_activities = K.mean(inputs, axis=0)\n",
     "        return self.weight * (\n",
-    "            keras.losses.kullback_leibler_divergence(self.target, mean_activities) +\n",
-    "            keras.losses.kullback_leibler_divergence(1. - self.target, 1. - mean_activities))"
+    "            kl_divergence(self.target, mean_activities) +\n",
+    "            kl_divergence(1. - self.target, 1. - mean_activities))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -992,27 +989,28 @@
     "])\n",
     "sparse_kl_ae = keras.models.Sequential([sparse_kl_encoder, sparse_kl_decoder])\n",
     "sparse_kl_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
-    "              metrics=[\"accuracy\"])\n",
+    "              metrics=[rounded_accuracy])\n",
     "history = sparse_kl_ae.fit(X_train, X_train, epochs=10,\n",
     "                           validation_data=[X_valid, X_valid])"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_reconstructions(sparse_kl_ae)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 46,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "show_reconstructions(sparse_kl_ae)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "plot_activations_histogram(sparse_kl_encoder)\n",
+    "save_fig(\"sparse_autoencoder_plot\")\n",
     "plt.show()"
    ]
   },
@@ -1020,12 +1018,486 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Hashing Autoencoder"
+    "# Variational Autoencoder"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Sampling(keras.layers.Layer):\n",
+    "    def call(self, inputs):\n",
+    "        mean, log_var = inputs\n",
+    "        return K.random_normal(tf.shape(log_var)) * K.exp(log_var / 2) + mean "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "codings_size = 10\n",
+    "\n",
+    "inputs = keras.layers.Input(shape=[28, 28])\n",
+    "z = keras.layers.Flatten()(inputs)\n",
+    "z = keras.layers.Dense(150, activation=\"selu\")(z)\n",
+    "z = keras.layers.Dense(100, activation=\"selu\")(z)\n",
+    "codings_mean = keras.layers.Dense(codings_size)(z)\n",
+    "codings_log_var = keras.layers.Dense(codings_size)(z)\n",
+    "codings = Sampling()([codings_mean, codings_log_var])\n",
+    "variational_encoder = keras.models.Model(\n",
+    "    inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])\n",
+    "\n",
+    "decoder_inputs = keras.layers.Input(shape=[codings_size])\n",
+    "x = keras.layers.Dense(100, activation=\"selu\")(decoder_inputs)\n",
+    "x = keras.layers.Dense(150, activation=\"selu\")(x)\n",
+    "x = keras.layers.Dense(28 * 28, activation=\"sigmoid\")(x)\n",
+    "outputs = keras.layers.Reshape([28, 28])(x)\n",
+    "variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])\n",
+    "\n",
+    "_, _, codings = variational_encoder(inputs)\n",
+    "reconstructions = variational_decoder(codings)\n",
+    "variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])\n",
+    "\n",
+    "latent_loss = -0.5 * K.sum(\n",
+    "    1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean),\n",
+    "    axis=-1)\n",
+    "variational_ae.add_loss(K.mean(latent_loss) / 784.)\n",
+    "variational_ae.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\", metrics=[rounded_accuracy])\n",
+    "history = variational_ae.fit(X_train, X_train, epochs=25, batch_size=128,\n",
+    "                             validation_data=[X_valid, X_valid])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructions(variational_ae)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate Fashion Images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_multiple_images(images, n_cols=None):\n",
+    "    n_cols = n_cols or len(images)\n",
+    "    n_rows = (len(images) - 1) // n_cols + 1\n",
+    "    if images.shape[-1] == 1:\n",
+    "        images = np.squeeze(images, axis=-1)\n",
+    "    plt.figure(figsize=(n_cols, n_rows))\n",
+    "    for index, image in enumerate(images):\n",
+    "        plt.subplot(n_rows, n_cols, index + 1)\n",
+    "        plt.imshow(image, cmap=\"binary\")\n",
+    "        plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's generate a few random codings, decode them and plot the resulting images:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "codings = tf.random.normal(shape=[12, codings_size])\n",
+    "images = variational_decoder(codings).numpy()\n",
+    "plot_multiple_images(images, 4)\n",
+    "save_fig(\"vae_generated_images_plot\", tight_layout=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's perform semantic interpolation between these images:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "codings_grid = tf.reshape(codings, [1, 3, 4, codings_size])\n",
+    "larger_grid = tf.image.resize(codings_grid, size=[5, 7])\n",
+    "interpolated_codings = tf.reshape(larger_grid, [-1, codings_size])\n",
+    "images = variational_decoder(interpolated_codings).numpy()\n",
+    "\n",
+    "plt.figure(figsize=(7, 5))\n",
+    "for index, image in enumerate(images):\n",
+    "    plt.subplot(5, 7, index + 1)\n",
+    "    if index%7%2==0 and index//7%2==0:\n",
+    "        plt.gca().get_xaxis().set_visible(False)\n",
+    "        plt.gca().get_yaxis().set_visible(False)\n",
+    "    else:\n",
+    "        plt.axis(\"off\")\n",
+    "    plt.imshow(image, cmap=\"binary\")\n",
+    "save_fig(\"semantic_interpolation_plot\", tight_layout=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Generative Adversarial Networks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
+    "\n",
+    "codings_size = 30\n",
+    "\n",
+    "generator = keras.models.Sequential([\n",
+    "    keras.layers.Dense(100, activation=\"selu\", input_shape=[codings_size]),\n",
+    "    keras.layers.Dense(150, activation=\"selu\"),\n",
+    "    keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
+    "    keras.layers.Reshape([28, 28])\n",
+    "])\n",
+    "discriminator = keras.models.Sequential([\n",
+    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
+    "    keras.layers.Dense(150, activation=\"selu\"),\n",
+    "    keras.layers.Dense(100, activation=\"selu\"),\n",
+    "    keras.layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "gan = keras.models.Sequential([generator, discriminator])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "discriminator.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")\n",
+    "discriminator.trainable = False\n",
+    "gan.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(1000)\n",
+    "dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_gan(gan, dataset, batch_size, codings_size, n_epochs=50):\n",
+    "    generator, discriminator = gan.layers\n",
+    "    for epoch in range(n_epochs):\n",
+    "        print(\"Epoch {}/{}\".format(epoch + 1, n_epochs))              # not shown in the book\n",
+    "        for X_batch in dataset:\n",
+    "            # phase 1 - training the discriminator\n",
+    "            noise = tf.random.normal(shape=[batch_size, codings_size])\n",
+    "            generated_images = generator(noise)\n",
+    "            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)\n",
+    "            y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)\n",
+    "            discriminator.trainable = True\n",
+    "            discriminator.train_on_batch(X_fake_and_real, y1)\n",
+    "            # phase 2 - training the generator\n",
+    "            noise = tf.random.normal(shape=[batch_size, codings_size])\n",
+    "            y2 = tf.constant([[1.]] * batch_size)\n",
+    "            discriminator.trainable = False\n",
+    "            gan.train_on_batch(noise, y2)\n",
+    "        plot_multiple_images(generated_images, 8)                     # not shown\n",
+    "        plt.show()                                                    # not shown"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_gan(gan, dataset, batch_size, codings_size, n_epochs=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "noise = tf.random.normal(shape=[batch_size, codings_size])\n",
+    "generated_images = generator(noise)\n",
+    "plot_multiple_images(generated_images, 8)\n",
+    "save_fig(\"gan_generated_images_plot\", tight_layout=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_gan(gan, dataset, batch_size, codings_size)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Deep Convolutional GAN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "codings_size = 100\n",
+    "\n",
+    "generator = keras.models.Sequential([\n",
+    "    keras.layers.Dense(7 * 7 * 128, input_shape=[codings_size]),\n",
+    "    keras.layers.Reshape([7, 7, 128]),\n",
+    "    keras.layers.BatchNormalization(),\n",
+    "    keras.layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding=\"SAME\",\n",
+    "                                 activation=\"selu\"),\n",
+    "    keras.layers.BatchNormalization(),\n",
+    "    keras.layers.Conv2DTranspose(1, kernel_size=5, strides=2, padding=\"SAME\",\n",
+    "                                 activation=\"tanh\"),\n",
+    "])\n",
+    "discriminator = keras.models.Sequential([\n",
+    "    keras.layers.Conv2D(64, kernel_size=5, strides=2, padding=\"SAME\",\n",
+    "                        activation=keras.layers.LeakyReLU(0.2),\n",
+    "                        input_shape=[28, 28, 1]),\n",
+    "    keras.layers.Dropout(0.4),\n",
+    "    keras.layers.Conv2D(128, kernel_size=5, strides=2, padding=\"SAME\",\n",
+    "                        activation=keras.layers.LeakyReLU(0.2)),\n",
+    "    keras.layers.Dropout(0.4),\n",
+    "    keras.layers.Flatten(),\n",
+    "    keras.layers.Dense(1, activation=\"sigmoid\")\n",
+    "])\n",
+    "gan = keras.models.Sequential([generator, discriminator])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "discriminator.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")\n",
+    "discriminator.trainable = False\n",
+    "gan.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train_dcgan = X_train.reshape(-1, 28, 28, 1) * 2. - 1. # reshape and rescale"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 32\n",
+    "dataset = tf.data.Dataset.from_tensor_slices(X_train_dcgan)\n",
+    "dataset = dataset.shuffle(1000)\n",
+    "dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_gan(gan, dataset, batch_size, codings_size)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "noise = tf.random.normal(shape=[batch_size, codings_size])\n",
+    "generated_images = generator(noise)\n",
+    "plot_multiple_images(generated_images, 8)\n",
+    "save_fig(\"dcgan_generated_images_plot\", tight_layout=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercise Solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Unsupervised pretraining"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create a small neural network for MNIST classification:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "X_train_small = X_train[:500]\n",
+    "y_train_small = y_train[:500]\n",
+    "\n",
+    "classifier = keras.models.Sequential([\n",
+    "    keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n",
+    "    keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
+    "    keras.layers.MaxPool2D(pool_size=2),\n",
+    "    keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
+    "    keras.layers.MaxPool2D(pool_size=2),\n",
+    "    keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
+    "    keras.layers.MaxPool2D(pool_size=2),\n",
+    "    keras.layers.Flatten(),\n",
+    "    keras.layers.Dense(20, activation=\"selu\"),\n",
+    "    keras.layers.Dense(10, activation=\"softmax\")\n",
+    "])\n",
+    "classifier.compile(loss=\"sparse_categorical_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.02),\n",
+    "                   metrics=[\"accuracy\"])\n",
+    "history = classifier.fit(X_train_small, y_train_small, epochs=20, validation_data=[X_valid, y_valid])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "pd.DataFrame(history.history).plot()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.random.set_seed(42)\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "conv_encoder_clone = keras.models.clone_model(conv_encoder)\n",
+    "\n",
+    "pretrained_clf = keras.models.Sequential([\n",
+    "    conv_encoder_clone,\n",
+    "    keras.layers.Flatten(),\n",
+    "    keras.layers.Dense(20, activation=\"selu\"),\n",
+    "    keras.layers.Dense(10, activation=\"softmax\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conv_encoder_clone.trainable = False\n",
+    "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n",
+    "                       optimizer=keras.optimizers.SGD(lr=0.02),\n",
+    "                       metrics=[\"accuracy\"])\n",
+    "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=30,\n",
+    "                             validation_data=[X_valid, y_valid])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "conv_encoder_clone.trainable = True\n",
+    "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n",
+    "                       optimizer=keras.optimizers.SGD(lr=0.02),\n",
+    "                       metrics=[\"accuracy\"])\n",
+    "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=20,\n",
+    "                             validation_data=[X_valid, y_valid])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hashing Using a Binary Autoencoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1045,14 +1517,14 @@
     "])\n",
     "hashing_ae = keras.models.Sequential([hashing_encoder, hashing_decoder])\n",
     "hashing_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
-    "                   metrics=[\"accuracy\"])\n",
+    "                   metrics=[rounded_accuracy])\n",
     "history = hashing_ae.fit(X_train, X_train, epochs=10,\n",
     "                         validation_data=[X_valid, X_valid])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 73,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1062,7 +1534,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 74,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1072,7 +1544,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1086,8 +1558,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {},
+   "execution_count": 76,
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "n_bits = 4\n",
@@ -1100,167 +1574,6 @@
     "        plt.imshow(image, cmap=\"binary\")\n",
     "        plt.axis(\"off\")"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Variational Autoencoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Sampling(keras.layers.Layer):\n",
-    "    def call(self, inputs):\n",
-    "        mean, log_var = inputs\n",
-    "        return mean + K.exp(log_var / 2) * K.random_normal(shape=tf.shape(log_var))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tf.random.set_seed(42)\n",
-    "np.random.seed(42)\n",
-    "\n",
-    "codings_size = 30\n",
-    "\n",
-    "inputs = keras.layers.Input(shape=[28, 28])\n",
-    "z = keras.layers.Flatten()(inputs)\n",
-    "z = keras.layers.Dense(150, activation=\"selu\")(z)\n",
-    "z = keras.layers.Dense(100, activation=\"selu\")(z)\n",
-    "codings_mean = keras.layers.Dense(codings_size)(z)\n",
-    "codings_log_var = keras.layers.Dense(codings_size)(z)\n",
-    "codings = Sampling()([codings_mean, codings_log_var])\n",
-    "variational_encoder = keras.models.Model(inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])\n",
-    "\n",
-    "decoder_inputs = keras.layers.Input(shape=[codings_size])\n",
-    "x = keras.layers.Dense(100, activation=\"selu\")(decoder_inputs)\n",
-    "x = keras.layers.Dense(150, activation=\"selu\")(x)\n",
-    "x = keras.layers.Dense(28 * 28, activation=\"sigmoid\")(x)\n",
-    "outputs = keras.layers.Reshape([28, 28])(x)\n",
-    "variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])\n",
-    "\n",
-    "_, _, codings = variational_encoder(inputs)\n",
-    "reconstructions = variational_decoder(codings)\n",
-    "variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])\n",
-    "\n",
-    "kld_loss = -0.5 * K.sum(1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean), axis=-1)\n",
-    "variational_ae.add_loss(K.mean(kld_loss) / 784.)\n",
-    "variational_ae.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\", metrics=[\"accuracy\"])\n",
-    "history = variational_ae.fit(X_train, X_train, epochs=50,\n",
-    "                             validation_data=[X_valid, X_valid],\n",
-    "                             batch_size=128)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "show_reconstructions(variational_ae)\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Generate Fashion Images"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's train the model and generate a few random fashion images:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tf.random.set_seed(42)\n",
-    "np.random.seed(42)\n",
-    "\n",
-    "n_rows = 6\n",
-    "n_cols = 10\n",
-    "codings_rnd = np.random.normal(size=[n_rows * n_cols, codings_size])\n",
-    "images = variational_decoder.predict(codings_rnd)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_multiple_images(images, n_rows, n_cols, pad=2):\n",
-    "    images = images - images.min()  # make the minimum == 0, so the padding looks white\n",
-    "    w,h = images.shape[1:]\n",
-    "    image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))\n",
-    "    for y in range(n_rows):\n",
-    "        for x in range(n_cols):\n",
-    "            image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]\n",
-    "    plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n",
-    "    plt.axis(\"off\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(8, 5))\n",
-    "plot_multiple_images(images, n_rows, n_cols)\n",
-    "save_fig(\"generated_fashion_images_plot\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Encode & Decode"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "n_iterations = 3\n",
-    "n_images = 10\n",
-    "source_codings = np.random.normal(size=[n_images, codings_size])\n",
-    "target_codings = np.roll(source_codings, -1, axis=0)\n",
-    "images = []\n",
-    "for iteration in range(n_iterations):\n",
-    "    codings_interpolate = source_codings + (target_codings - source_codings) * iteration / n_iterations\n",
-    "    images.append(variational_decoder(codings_interpolate).numpy())\n",
-    "images = np.concatenate(images)\n",
-    "\n",
-    "plt.figure(figsize=(8, 3))\n",
-    "plot_multiple_images(images, n_iterations, n_cols)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {