diff --git a/17_autoencoders.ipynb b/17_autoencoders.ipynb index d770238..e241ee4 100644 --- a/17_autoencoders.ipynb +++ b/17_autoencoders.ipynb @@ -91,7 +91,7 @@ "outputs": [], "source": [ "def plot_image(image):\n", - " plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n", + " plt.imshow(image, cmap=\"binary\")\n", " plt.axis(\"off\")" ] }, @@ -117,34 +117,16 @@ "source": [ "np.random.seed(4)\n", "\n", - "m = 200\n", - "w1, w2 = 0.1, 0.3\n", - "noise = 0.1\n", + "def generate_3d_data(m, w1=0.1, w2=0.3, noise=0.1):\n", + " angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5\n", + " data = np.empty((m, 3))\n", + " data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2\n", + " data[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2\n", + " data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * np.random.randn(m)\n", + " return data\n", "\n", - "angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5\n", - "data = np.empty((m, 3))\n", - "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2\n", - "data[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2\n", - "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * np.random.randn(m)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Normalize the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.preprocessing import StandardScaler\n", - "scaler = StandardScaler()\n", - "X_train = scaler.fit_transform(data[:100])\n", - "X_test = scaler.transform(data[100:])" + "X_train = generate_3d_data(60)\n", + "X_train = X_train - X_train.mean(axis=0, keepdims=0)" ] }, { @@ -156,18 +138,27 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ + "np.random.seed(42)\n", "tf.random.set_seed(42)\n", "\n", "encoder = keras.models.Sequential([keras.layers.Dense(2, input_shape=[3])])\n", "decoder = keras.models.Sequential([keras.layers.Dense(3, input_shape=[2])])\n", "autoencoder = keras.models.Sequential([encoder, decoder])\n", "\n", - "autoencoder.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.1))\n", - "history = autoencoder.fit(X_train, X_train, epochs=20, validation_data=[X_test, X_test])" + "autoencoder.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1.5))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "history = autoencoder.fit(X_train, X_train, epochs=20)" ] }, { @@ -176,7 +167,7 @@ "metadata": {}, "outputs": [], "source": [ - "codings_test = encoder.predict(X_test)" + "codings = encoder.predict(X_train)" ] }, { @@ -186,9 +177,10 @@ "outputs": [], "source": [ "fig = plt.figure(figsize=(4,3))\n", - "plt.plot(codings_test[:,0], codings_test[:, 1], \"b.\")\n", + "plt.plot(codings[:,0], codings[:, 1], \"b.\")\n", "plt.xlabel(\"$z_1$\", fontsize=18)\n", "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + "plt.grid(True)\n", "save_fig(\"linear_autoencoder_pca_plot\")\n", "plt.show()" ] @@ -214,8 +206,8 @@ "outputs": [], "source": [ "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", - "X_train_full = X_train_full / 255\n", - "X_test = X_test / 255\n", + "X_train_full = X_train_full.astype(np.float32) / 255\n", + "X_test = X_test.astype(np.float32) / 255\n", "X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]\n", "y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]" ] @@ -239,6 +231,16 @@ "execution_count": 9, "metadata": {}, "outputs": [], + "source": [ + "def rounded_accuracy(y_true, y_pred):\n", + " return keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], "source": [ "tf.random.set_seed(42)\n", "np.random.seed(42)\n", @@ -254,9 +256,9 @@ " keras.layers.Reshape([28, 28])\n", "])\n", "stacked_ae = keras.models.Sequential([stacked_encoder, stacked_decoder])\n", - "stacked_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n", - " metrics=[\"accuracy\"])\n", - "history = stacked_ae.fit(X_train, X_train, epochs=10,\n", + "stacked_ae.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1.5), metrics=[rounded_accuracy])\n", + "history = stacked_ae.fit(X_train, X_train, epochs=20,\n", " validation_data=[X_valid, X_valid])" ] }, @@ -269,23 +271,23 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "def show_reconstructions(model, n_test_images = 2):\n", - " reconstructions = model.predict(X_test[:n_test_images])\n", - " fig = plt.figure(figsize=(8, 3 * n_test_images))\n", - " for image_index in range(n_test_images):\n", - " plt.subplot(n_test_images, 2, image_index * 2 + 1)\n", - " plot_image(X_test[image_index])\n", - " plt.subplot(n_test_images, 2, image_index * 2 + 2)\n", + "def show_reconstructions(model, images=X_valid, n_images=5):\n", + " reconstructions = model.predict(images[:n_images])\n", + " fig = plt.figure(figsize=(n_images * 1.5, 3))\n", + " for image_index in range(n_images):\n", + " plt.subplot(2, n_images, 1 + image_index)\n", + " plot_image(images[image_index])\n", + " plt.subplot(2, n_images, 1 + n_images + image_index)\n", " plot_image(reconstructions[image_index])" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -293,6 +295,71 @@ "save_fig(\"reconstruction_plot\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualizing Fashion MNIST" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "\n", + "from sklearn.manifold import TSNE\n", + "\n", + "X_valid_compressed = stacked_encoder.predict(X_valid)\n", + "tsne = TSNE()\n", + "X_valid_2D = tsne.fit_transform(X_valid_compressed)\n", + "X_valid_2D = (X_valid_2D - X_valid_2D.min()) / (X_valid_2D.max() - X_valid_2D.min())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "plt.scatter(X_valid_2D[:, 0], X_valid_2D[:, 1], c=y_valid, s=10, cmap=\"tab10\")\n", + "plt.axis(\"off\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's make this diagram a bit prettier:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# adapted from https://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html\n", + "plt.figure(figsize=(10, 8))\n", + "cmap = plt.cm.tab10\n", + "plt.scatter(X_valid_2D[:, 0], X_valid_2D[:, 1], c=y_valid, s=10, cmap=cmap)\n", + "image_positions = np.array([[1., 1.]])\n", + "for index, position in enumerate(X_valid_2D):\n", + " dist = np.sum((position - image_positions) ** 2, axis=1)\n", + " if np.min(dist) > 0.02: # if far enough from other images\n", + " image_positions = np.r_[image_positions, [position]]\n", + " imagebox = mpl.offsetbox.AnnotationBbox(\n", + " mpl.offsetbox.OffsetImage(X_valid[index], cmap=\"binary\"),\n", + " position, bboxprops={\"edgecolor\": cmap(y_valid[index]), \"lw\": 2})\n", + " plt.gca().add_artist(imagebox)\n", + "plt.axis(\"off\")\n", + "save_fig(\"fashion_mnist_visualization_plot\")\n", + "plt.show()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -309,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -321,16 +388,18 @@ " self.activation = keras.activations.get(activation)\n", " super().__init__(**kwargs)\n", " def build(self, batch_input_shape):\n", - " self.biases = self.add_weight(name=\"bias\", shape=[self.dense.input_shape[-1]],\n", + " self.biases = self.add_weight(name=\"bias\",\n", + " shape=[self.dense.input_shape[-1]],\n", " initializer=\"zeros\")\n", " super().build(batch_input_shape)\n", " def call(self, inputs):\n", - " return self.activation(inputs @ K.transpose(self.dense.weights[0]) + self.biases)" + " z = inputs @ K.transpose(self.dense.weights[0]) + self.biases\n", + " return self.activation(z)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -348,14 +417,15 @@ " keras.layers.Reshape([28, 28])\n", "])\n", "tied_ae = keras.models.Sequential([tied_encoder, tied_decoder])\n", - "tied_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n", - " metrics=[\"accuracy\"])\n", - "history = tied_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])" + "tied_ae.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1.5), metrics=[rounded_accuracy])\n", + "history = tied_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "metadata": { "scrolled": true }, @@ -374,12 +444,12 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer, metrics=None,\n", - " n_epochs=10, output_activation=None):\n", + "def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer,\n", + " n_epochs=10, output_activation=None, metrics=None):\n", " n_inputs = X_train.shape[-1]\n", " encoder = keras.models.Sequential([\n", " keras.layers.Dense(n_neurons, activation=\"selu\", input_shape=[n_inputs])\n", @@ -396,95 +466,67 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "tf.random.set_seed(42)\n", "np.random.seed(42)\n", "\n", - "X_train_flat = keras.layers.Flatten()(X_train)\n", - "X_valid_flat = keras.layers.Flatten()(X_valid)\n", + "X_train_flat = K.batch_flatten(X_train) # equivalent to .reshape(-1, 28 * 28)\n", + "X_valid_flat = K.batch_flatten(X_valid)\n", "enc1, dec1, X_train_enc1, X_valid_enc1 = train_autoencoder(\n", - " 100, X_train_flat, X_valid_flat, \"binary_crossentropy\", keras.optimizers.SGD(lr=0.1),\n", - " output_activation=\"sigmoid\", metrics=[\"accuracy\"])\n", + " 100, X_train_flat, X_valid_flat, \"binary_crossentropy\",\n", + " keras.optimizers.SGD(lr=1.5), output_activation=\"sigmoid\",\n", + " metrics=[rounded_accuracy])\n", "enc2, dec2, _, _ = train_autoencoder(\n", - " 30, X_train_enc1, X_valid_enc1, \"mse\", keras.optimizers.Adam(),\n", + " 30, X_train_enc1, X_valid_enc1, \"mse\", keras.optimizers.SGD(lr=0.05),\n", " output_activation=\"selu\")" ] }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "stacked_ae_1_by_1 = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " enc1,\n", - " enc2,\n", - " dec2,\n", - " dec1,\n", - " keras.layers.Reshape([28, 28])\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructions(stacked_ae_1_by_1)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "stacked_ae_1_by_1.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.5),\n", - " metrics=[\"accuracy\"])\n", - "history = stacked_ae_1_by_1.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructions(stacked_ae_1_by_1)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Visualizing the extracted features" - ] - }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "weights1 = stacked_ae_1_by_1.layers[1].get_weights()[0]\n", - "plt.figure(figsize=(8, 2))\n", - "n_rows, n_cols = 2, 8\n", - "for row in range(n_rows):\n", - " for col in range(n_cols):\n", - " index = row * n_cols + col\n", - " plt.subplot(n_rows, n_cols, index + 1)\n", - " plt.imshow(weights1[:, index].reshape(28, 28), cmap=\"Greys\")\n", - " plt.axis(\"off\")\n", - "\n", - "save_fig(\"extracted_features_plot\", tight_layout=False) # not shown\n", - "plt.show() # not shown" + "stacked_ae_1_by_1 = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " enc1, enc2, dec2, dec1,\n", + " keras.layers.Reshape([28, 28])\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(stacked_ae_1_by_1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "stacked_ae_1_by_1.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=0.1), metrics=[rounded_accuracy])\n", + "history = stacked_ae_1_by_1.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(stacked_ae_1_by_1)\n", + "plt.show()" ] }, { @@ -503,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -529,14 +571,14 @@ "conv_ae = keras.models.Sequential([conv_encoder, conv_decoder])\n", "\n", "conv_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = conv_ae.fit(X_train, X_train, epochs=5,\n", " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -546,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -558,72 +600,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Unsupervised pretraining" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's create a small neural network for MNIST classification:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "X_train_small = X_train[:500]\n", - "y_train_small = y_train[:500]\n", - "\n", - "classifier = keras.models.Sequential([\n", - " keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n", - " keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", - " keras.layers.MaxPool2D(pool_size=2),\n", - " keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", - " keras.layers.MaxPool2D(pool_size=2),\n", - " keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", - " keras.layers.MaxPool2D(pool_size=2),\n", - " keras.layers.Flatten(),\n", - " keras.layers.Dense(20, activation=\"selu\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "classifier.compile(loss=\"sparse_categorical_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.02), metrics=[\"accuracy\"])\n", - "history = classifier.fit(X_train_small, y_train_small, epochs=20, validation_data=[X_valid, y_valid])" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "pd.DataFrame(history.history).plot()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "conv_encoder_clone = keras.models.clone_model(conv_encoder)\n", - "\n", - "pretrained_clf = keras.models.Sequential([\n", - " conv_encoder_clone,\n", - " keras.layers.Flatten(),\n", - " keras.layers.Dense(20, activation=\"selu\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" + "# Recurrent Autoencoders" ] }, { @@ -632,12 +609,18 @@ "metadata": {}, "outputs": [], "source": [ - "conv_encoder_clone.trainable = False\n", - "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=0.02),\n", - " metrics=[\"accuracy\"])\n", - "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=30,\n", - " validation_data=[X_valid, y_valid])" + "recurrent_encoder = keras.models.Sequential([\n", + " keras.layers.LSTM(100, return_sequences=True, input_shape=[28, 28]),\n", + " keras.layers.LSTM(30)\n", + "])\n", + "recurrent_decoder = keras.models.Sequential([\n", + " keras.layers.RepeatVector(28, input_shape=[30]),\n", + " keras.layers.LSTM(100, return_sequences=True),\n", + " keras.layers.TimeDistributed(keras.layers.Dense(28, activation=\"sigmoid\"))\n", + "])\n", + "recurrent_ae = keras.models.Sequential([recurrent_encoder, recurrent_decoder])\n", + "recurrent_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(0.1),\n", + " metrics=[rounded_accuracy])" ] }, { @@ -646,12 +629,17 @@ "metadata": {}, "outputs": [], "source": [ - "conv_encoder_clone.trainable = True\n", - "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=0.02),\n", - " metrics=[\"accuracy\"])\n", - "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=20,\n", - " validation_data=[X_valid, y_valid])" + "history = recurrent_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(recurrent_ae)\n", + "plt.show()" ] }, { @@ -670,7 +658,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -679,7 +667,7 @@ "\n", "denoising_encoder = keras.models.Sequential([\n", " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.GaussianNoise(1.0),\n", + " keras.layers.GaussianNoise(0.2),\n", " keras.layers.Dense(100, activation=\"selu\"),\n", " keras.layers.Dense(30, activation=\"selu\")\n", "])\n", @@ -690,18 +678,22 @@ "])\n", "denoising_ae = keras.models.Sequential([denoising_encoder, denoising_decoder])\n", "denoising_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = denoising_ae.fit(X_train, X_train, epochs=10,\n", " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "show_reconstructions(denoising_ae)\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "noise = keras.layers.GaussianNoise(0.2)\n", + "show_reconstructions(denoising_ae, noise(X_valid, training=True))\n", "plt.show()" ] }, @@ -714,7 +706,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -734,19 +726,23 @@ "])\n", "dropout_ae = keras.models.Sequential([dropout_encoder, dropout_decoder])\n", "dropout_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = dropout_ae.fit(X_train, X_train, epochs=10,\n", " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "show_reconstructions(dropout_ae)\n", - "plt.show()" + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "dropout = keras.layers.Dropout(0.5)\n", + "show_reconstructions(dropout_ae, dropout(X_valid, training=True))\n", + "save_fig(\"dropout_denoising_plot\", tight_layout=False)" ] }, { @@ -760,12 +756,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's build a simple stacked autoencoder, but this time we will use the sigmoid activation function for the coding layer, to ensure that the coding values range from 0 to 1:" + "Let's build a simple stacked autoencoder, so we can compare it to the sparse autoencoders we will build. This time we will use the sigmoid activation function for the coding layer, to ensure that the coding values range from 0 to 1:" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -784,14 +780,14 @@ "])\n", "simple_ae = keras.models.Sequential([simple_encoder, simple_decoder])\n", "simple_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = simple_ae.fit(X_train, X_train, epochs=10,\n", " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -808,7 +804,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -825,7 +821,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -858,7 +854,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -875,7 +871,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -897,14 +893,14 @@ "])\n", "sparse_l1_ae = keras.models.Sequential([sparse_l1_encoder, sparse_l1_decoder])\n", "sparse_l1_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = sparse_l1_ae.fit(X_train, X_train, epochs=10,\n", " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -913,7 +909,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -930,7 +926,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -953,11 +949,12 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "K = keras.backend\n", + "kl_divergence = keras.losses.kullback_leibler_divergence\n", "\n", "class KLDivergenceRegularizer(keras.regularizers.Regularizer):\n", " def __init__(self, weight, target=0.1):\n", @@ -966,13 +963,13 @@ " def __call__(self, inputs):\n", " mean_activities = K.mean(inputs, axis=0)\n", " return self.weight * (\n", - " keras.losses.kullback_leibler_divergence(self.target, mean_activities) +\n", - " keras.losses.kullback_leibler_divergence(1. - self.target, 1. - mean_activities))" + " kl_divergence(self.target, mean_activities) +\n", + " kl_divergence(1. - self.target, 1. - mean_activities))" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -992,27 +989,28 @@ "])\n", "sparse_kl_ae = keras.models.Sequential([sparse_kl_encoder, sparse_kl_decoder])\n", "sparse_kl_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = sparse_kl_ae.fit(X_train, X_train, epochs=10,\n", " validation_data=[X_valid, X_valid])" ] }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructions(sparse_kl_ae)" - ] - }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], + "source": [ + "show_reconstructions(sparse_kl_ae)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], "source": [ "plot_activations_histogram(sparse_kl_encoder)\n", + "save_fig(\"sparse_autoencoder_plot\")\n", "plt.show()" ] }, @@ -1020,12 +1018,486 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Hashing Autoencoder" + "# Variational Autoencoder" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "class Sampling(keras.layers.Layer):\n", + " def call(self, inputs):\n", + " mean, log_var = inputs\n", + " return K.random_normal(tf.shape(log_var)) * K.exp(log_var / 2) + mean " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "codings_size = 10\n", + "\n", + "inputs = keras.layers.Input(shape=[28, 28])\n", + "z = keras.layers.Flatten()(inputs)\n", + "z = keras.layers.Dense(150, activation=\"selu\")(z)\n", + "z = keras.layers.Dense(100, activation=\"selu\")(z)\n", + "codings_mean = keras.layers.Dense(codings_size)(z)\n", + "codings_log_var = keras.layers.Dense(codings_size)(z)\n", + "codings = Sampling()([codings_mean, codings_log_var])\n", + "variational_encoder = keras.models.Model(\n", + " inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])\n", + "\n", + "decoder_inputs = keras.layers.Input(shape=[codings_size])\n", + "x = keras.layers.Dense(100, activation=\"selu\")(decoder_inputs)\n", + "x = keras.layers.Dense(150, activation=\"selu\")(x)\n", + "x = keras.layers.Dense(28 * 28, activation=\"sigmoid\")(x)\n", + "outputs = keras.layers.Reshape([28, 28])(x)\n", + "variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])\n", + "\n", + "_, _, codings = variational_encoder(inputs)\n", + "reconstructions = variational_decoder(codings)\n", + "variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])\n", + "\n", + "latent_loss = -0.5 * K.sum(\n", + " 1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean),\n", + " axis=-1)\n", + "variational_ae.add_loss(K.mean(latent_loss) / 784.)\n", + "variational_ae.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\", metrics=[rounded_accuracy])\n", + "history = variational_ae.fit(X_train, X_train, epochs=25, batch_size=128,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "show_reconstructions(variational_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate Fashion Images" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_multiple_images(images, n_cols=None):\n", + " n_cols = n_cols or len(images)\n", + " n_rows = (len(images) - 1) // n_cols + 1\n", + " if images.shape[-1] == 1:\n", + " images = np.squeeze(images, axis=-1)\n", + " plt.figure(figsize=(n_cols, n_rows))\n", + " for index, image in enumerate(images):\n", + " plt.subplot(n_rows, n_cols, index + 1)\n", + " plt.imshow(image, cmap=\"binary\")\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's generate a few random codings, decode them and plot the resulting images:" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "\n", + "codings = tf.random.normal(shape=[12, codings_size])\n", + "images = variational_decoder(codings).numpy()\n", + "plot_multiple_images(images, 4)\n", + "save_fig(\"vae_generated_images_plot\", tight_layout=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's perform semantic interpolation between these images:" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "codings_grid = tf.reshape(codings, [1, 3, 4, codings_size])\n", + "larger_grid = tf.image.resize(codings_grid, size=[5, 7])\n", + "interpolated_codings = tf.reshape(larger_grid, [-1, codings_size])\n", + "images = variational_decoder(interpolated_codings).numpy()\n", + "\n", + "plt.figure(figsize=(7, 5))\n", + "for index, image in enumerate(images):\n", + " plt.subplot(5, 7, index + 1)\n", + " if index%7%2==0 and index//7%2==0:\n", + " plt.gca().get_xaxis().set_visible(False)\n", + " plt.gca().get_yaxis().set_visible(False)\n", + " else:\n", + " plt.axis(\"off\")\n", + " plt.imshow(image, cmap=\"binary\")\n", + "save_fig(\"semantic_interpolation_plot\", tight_layout=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generative Adversarial Networks" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "codings_size = 30\n", + "\n", + "generator = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[codings_size]),\n", + " keras.layers.Dense(150, activation=\"selu\"),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "discriminator = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(150, activation=\"selu\"),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(1, activation=\"sigmoid\")\n", + "])\n", + "gan = keras.models.Sequential([generator, discriminator])" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "discriminator.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")\n", + "discriminator.trainable = False\n", + "gan.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 32\n", + "dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(1000)\n", + "dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "def train_gan(gan, dataset, batch_size, codings_size, n_epochs=50):\n", + " generator, discriminator = gan.layers\n", + " for epoch in range(n_epochs):\n", + " print(\"Epoch {}/{}\".format(epoch + 1, n_epochs)) # not shown in the book\n", + " for X_batch in dataset:\n", + " # phase 1 - training the discriminator\n", + " noise = tf.random.normal(shape=[batch_size, codings_size])\n", + " generated_images = generator(noise)\n", + " X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)\n", + " y1 = tf.constant([[0.]] * batch_size + [[1.]] * batch_size)\n", + " discriminator.trainable = True\n", + " discriminator.train_on_batch(X_fake_and_real, y1)\n", + " # phase 2 - training the generator\n", + " noise = tf.random.normal(shape=[batch_size, codings_size])\n", + " y2 = tf.constant([[1.]] * batch_size)\n", + " discriminator.trainable = False\n", + " gan.train_on_batch(noise, y2)\n", + " plot_multiple_images(generated_images, 8) # not shown\n", + " plt.show() # not shown" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "train_gan(gan, dataset, batch_size, codings_size, n_epochs=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "noise = tf.random.normal(shape=[batch_size, codings_size])\n", + "generated_images = generator(noise)\n", + "plot_multiple_images(generated_images, 8)\n", + "save_fig(\"gan_generated_images_plot\", tight_layout=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "train_gan(gan, dataset, batch_size, codings_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deep Convolutional GAN" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "codings_size = 100\n", + "\n", + "generator = keras.models.Sequential([\n", + " keras.layers.Dense(7 * 7 * 128, input_shape=[codings_size]),\n", + " keras.layers.Reshape([7, 7, 128]),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding=\"SAME\",\n", + " activation=\"selu\"),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Conv2DTranspose(1, kernel_size=5, strides=2, padding=\"SAME\",\n", + " activation=\"tanh\"),\n", + "])\n", + "discriminator = keras.models.Sequential([\n", + " keras.layers.Conv2D(64, kernel_size=5, strides=2, padding=\"SAME\",\n", + " activation=keras.layers.LeakyReLU(0.2),\n", + " input_shape=[28, 28, 1]),\n", + " keras.layers.Dropout(0.4),\n", + " keras.layers.Conv2D(128, kernel_size=5, strides=2, padding=\"SAME\",\n", + " activation=keras.layers.LeakyReLU(0.2)),\n", + " keras.layers.Dropout(0.4),\n", + " keras.layers.Flatten(),\n", + " keras.layers.Dense(1, activation=\"sigmoid\")\n", + "])\n", + "gan = keras.models.Sequential([generator, discriminator])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "discriminator.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")\n", + "discriminator.trainable = False\n", + "gan.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_dcgan = X_train.reshape(-1, 28, 28, 1) * 2. - 1. # reshape and rescale" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 32\n", + "dataset = tf.data.Dataset.from_tensor_slices(X_train_dcgan)\n", + "dataset = dataset.shuffle(1000)\n", + "dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "train_gan(gan, dataset, batch_size, codings_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "noise = tf.random.normal(shape=[batch_size, codings_size])\n", + "generated_images = generator(noise)\n", + "plot_multiple_images(generated_images, 8)\n", + "save_fig(\"dcgan_generated_images_plot\", tight_layout=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise Solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unsupervised pretraining" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create a small neural network for MNIST classification:" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "X_train_small = X_train[:500]\n", + "y_train_small = y_train[:500]\n", + "\n", + "classifier = keras.models.Sequential([\n", + " keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n", + " keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Flatten(),\n", + " keras.layers.Dense(20, activation=\"selu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "classifier.compile(loss=\"sparse_categorical_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.02),\n", + " metrics=[\"accuracy\"])\n", + "history = classifier.fit(X_train_small, y_train_small, epochs=20, validation_data=[X_valid, y_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(history.history).plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "conv_encoder_clone = keras.models.clone_model(conv_encoder)\n", + "\n", + "pretrained_clf = keras.models.Sequential([\n", + " conv_encoder_clone,\n", + " keras.layers.Flatten(),\n", + " keras.layers.Dense(20, activation=\"selu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "conv_encoder_clone.trainable = False\n", + "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=0.02),\n", + " metrics=[\"accuracy\"])\n", + "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=30,\n", + " validation_data=[X_valid, y_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "conv_encoder_clone.trainable = True\n", + "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=0.02),\n", + " metrics=[\"accuracy\"])\n", + "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=20,\n", + " validation_data=[X_valid, y_valid])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hashing Using a Binary Autoencoder" + ] + }, + { + "cell_type": "code", + "execution_count": 72, "metadata": {}, "outputs": [], "source": [ @@ -1045,14 +1517,14 @@ "])\n", "hashing_ae = keras.models.Sequential([hashing_encoder, hashing_decoder])\n", "hashing_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", - " metrics=[\"accuracy\"])\n", + " metrics=[rounded_accuracy])\n", "history = hashing_ae.fit(X_train, X_train, epochs=10,\n", " validation_data=[X_valid, X_valid])" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ @@ -1062,7 +1534,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 74, "metadata": {}, "outputs": [], "source": [ @@ -1072,7 +1544,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ @@ -1086,8 +1558,10 @@ }, { "cell_type": "code", - "execution_count": 51, - "metadata": {}, + "execution_count": 76, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "n_bits = 4\n", @@ -1100,167 +1574,6 @@ " plt.imshow(image, cmap=\"binary\")\n", " plt.axis(\"off\")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Variational Autoencoder" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "class Sampling(keras.layers.Layer):\n", - " def call(self, inputs):\n", - " mean, log_var = inputs\n", - " return mean + K.exp(log_var / 2) * K.random_normal(shape=tf.shape(log_var))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "codings_size = 30\n", - "\n", - "inputs = keras.layers.Input(shape=[28, 28])\n", - "z = keras.layers.Flatten()(inputs)\n", - "z = keras.layers.Dense(150, activation=\"selu\")(z)\n", - "z = keras.layers.Dense(100, activation=\"selu\")(z)\n", - "codings_mean = keras.layers.Dense(codings_size)(z)\n", - "codings_log_var = keras.layers.Dense(codings_size)(z)\n", - "codings = Sampling()([codings_mean, codings_log_var])\n", - "variational_encoder = keras.models.Model(inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])\n", - "\n", - "decoder_inputs = keras.layers.Input(shape=[codings_size])\n", - "x = keras.layers.Dense(100, activation=\"selu\")(decoder_inputs)\n", - "x = keras.layers.Dense(150, activation=\"selu\")(x)\n", - "x = keras.layers.Dense(28 * 28, activation=\"sigmoid\")(x)\n", - "outputs = keras.layers.Reshape([28, 28])(x)\n", - "variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])\n", - "\n", - "_, _, codings = variational_encoder(inputs)\n", - "reconstructions = variational_decoder(codings)\n", - "variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])\n", - "\n", - "kld_loss = -0.5 * K.sum(1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean), axis=-1)\n", - "variational_ae.add_loss(K.mean(kld_loss) / 784.)\n", - "variational_ae.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\", metrics=[\"accuracy\"])\n", - "history = variational_ae.fit(X_train, X_train, epochs=50,\n", - " validation_data=[X_valid, X_valid],\n", - " batch_size=128)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructions(variational_ae)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate Fashion Images" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's train the model and generate a few random fashion images:" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "n_rows = 6\n", - "n_cols = 10\n", - "codings_rnd = np.random.normal(size=[n_rows * n_cols, codings_size])\n", - "images = variational_decoder.predict(codings_rnd)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_multiple_images(images, n_rows, n_cols, pad=2):\n", - " images = images - images.min() # make the minimum == 0, so the padding looks white\n", - " w,h = images.shape[1:]\n", - " image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))\n", - " for y in range(n_rows):\n", - " for x in range(n_cols):\n", - " image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]\n", - " plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n", - " plt.axis(\"off\")" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(8, 5))\n", - "plot_multiple_images(images, n_rows, n_cols)\n", - "save_fig(\"generated_fashion_images_plot\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Encode & Decode" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "n_iterations = 3\n", - "n_images = 10\n", - "source_codings = np.random.normal(size=[n_images, codings_size])\n", - "target_codings = np.roll(source_codings, -1, axis=0)\n", - "images = []\n", - "for iteration in range(n_iterations):\n", - " codings_interpolate = source_codings + (target_codings - source_codings) * iteration / n_iterations\n", - " images.append(variational_decoder(codings_interpolate).numpy())\n", - "images = np.concatenate(images)\n", - "\n", - "plt.figure(figsize=(8, 3))\n", - "plot_multiple_images(images, n_iterations, n_cols)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {