From 327407501e5a38c123f987591ca2c0cfaf4efa54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Fri, 26 Apr 2019 21:19:32 +0800 Subject: [PATCH] Add notebook 17 on autoencoders --- 17_autoencoders.ipynb | 1300 ++++++++++++++++++ work_in_progress/15_autoencoders.ipynb | 1756 ------------------------ 2 files changed, 1300 insertions(+), 1756 deletions(-) create mode 100644 17_autoencoders.ipynb delete mode 100644 work_in_progress/15_autoencoders.ipynb diff --git a/17_autoencoders.ipynb b/17_autoencoders.ipynb new file mode 100644 index 0000000..d770238 --- /dev/null +++ b/17_autoencoders.ipynb @@ -0,0 +1,1300 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 17 – Autoencoders**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook contains all the sample code in chapter 17._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", + "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", + "# TensorFlow ≥2.0-preview is required\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "assert tf.__version__ >= \"2.0\"\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"autoencoders\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", + "\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format=fig_extension, dpi=resolution)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A couple utility functions to plot grayscale 28x28 image:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_image(image):\n", + " plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PCA with a linear Autoencoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Build 3D dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(4)\n", + "\n", + "m = 200\n", + "w1, w2 = 0.1, 0.3\n", + "noise = 0.1\n", + "\n", + "angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5\n", + "data = np.empty((m, 3))\n", + "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2\n", + "data[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2\n", + "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * np.random.randn(m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normalize the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "X_train = scaler.fit_transform(data[:100])\n", + "X_test = scaler.transform(data[100:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's build the Autoencoder..." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "\n", + "encoder = keras.models.Sequential([keras.layers.Dense(2, input_shape=[3])])\n", + "decoder = keras.models.Sequential([keras.layers.Dense(3, input_shape=[2])])\n", + "autoencoder = keras.models.Sequential([encoder, decoder])\n", + "\n", + "autoencoder.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.1))\n", + "history = autoencoder.fit(X_train, X_train, epochs=20, validation_data=[X_test, X_test])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "codings_test = encoder.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(4,3))\n", + "plt.plot(codings_test[:,0], codings_test[:, 1], \"b.\")\n", + "plt.xlabel(\"$z_1$\", fontsize=18)\n", + "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", + "save_fig(\"linear_autoencoder_pca_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Stacked Autoencoders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's use MNIST:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", + "X_train_full = X_train_full / 255\n", + "X_test = X_test / 255\n", + "X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]\n", + "y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train all layers at once" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (i.e., 2 stacked Autoencoders)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "stacked_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(30, activation=\"selu\"),\n", + "])\n", + "stacked_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "stacked_ae = keras.models.Sequential([stacked_encoder, stacked_decoder])\n", + "stacked_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n", + " metrics=[\"accuracy\"])\n", + "history = stacked_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function processes a few test images through the autoencoder and displays the original images and their reconstructions:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def show_reconstructions(model, n_test_images = 2):\n", + " reconstructions = model.predict(X_test[:n_test_images])\n", + " fig = plt.figure(figsize=(8, 3 * n_test_images))\n", + " for image_index in range(n_test_images):\n", + " plt.subplot(n_test_images, 2, image_index * 2 + 1)\n", + " plot_image(X_test[image_index])\n", + " plt.subplot(n_test_images, 2, image_index * 2 + 2)\n", + " plot_image(reconstructions[image_index])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(stacked_ae)\n", + "save_fig(\"reconstruction_plot\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tying weights" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is common to tie the weights of the encoder and the decoder, by simply using the transpose of the encoder's weights as the decoder weights. For this, we need to use a custom layer." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "K = keras.backend\n", + "\n", + "class DenseTranspose(keras.layers.Layer):\n", + " def __init__(self, dense, activation=None, **kwargs):\n", + " self.dense = dense\n", + " self.activation = keras.activations.get(activation)\n", + " super().__init__(**kwargs)\n", + " def build(self, batch_input_shape):\n", + " self.biases = self.add_weight(name=\"bias\", shape=[self.dense.input_shape[-1]],\n", + " initializer=\"zeros\")\n", + " super().build(batch_input_shape)\n", + " def call(self, inputs):\n", + " return self.activation(inputs @ K.transpose(self.dense.weights[0]) + self.biases)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "tied_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(30, activation=\"selu\"),\n", + "])\n", + "tied_decoder = keras.models.Sequential([\n", + " DenseTranspose(tied_encoder.layers[2], activation=\"selu\"),\n", + " DenseTranspose(tied_encoder.layers[1], activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "tied_ae = keras.models.Sequential([tied_encoder, tied_decoder])\n", + "tied_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n", + " metrics=[\"accuracy\"])\n", + "history = tied_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "show_reconstructions(tied_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training one Autoencoder at a time" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer, metrics=None,\n", + " n_epochs=10, output_activation=None):\n", + " n_inputs = X_train.shape[-1]\n", + " encoder = keras.models.Sequential([\n", + " keras.layers.Dense(n_neurons, activation=\"selu\", input_shape=[n_inputs])\n", + " ])\n", + " decoder = keras.models.Sequential([\n", + " keras.layers.Dense(n_inputs, activation=output_activation),\n", + " ])\n", + " autoencoder = keras.models.Sequential([encoder, decoder])\n", + " autoencoder.compile(optimizer, loss, metrics=metrics)\n", + " autoencoder.fit(X_train, X_train, epochs=n_epochs,\n", + " validation_data=[X_valid, X_valid])\n", + " return encoder, decoder, encoder(X_train), encoder(X_valid)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "X_train_flat = keras.layers.Flatten()(X_train)\n", + "X_valid_flat = keras.layers.Flatten()(X_valid)\n", + "enc1, dec1, X_train_enc1, X_valid_enc1 = train_autoencoder(\n", + " 100, X_train_flat, X_valid_flat, \"binary_crossentropy\", keras.optimizers.SGD(lr=0.1),\n", + " output_activation=\"sigmoid\", metrics=[\"accuracy\"])\n", + "enc2, dec2, _, _ = train_autoencoder(\n", + " 30, X_train_enc1, X_valid_enc1, \"mse\", keras.optimizers.Adam(),\n", + " output_activation=\"selu\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "stacked_ae_1_by_1 = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " enc1,\n", + " enc2,\n", + " dec2,\n", + " dec1,\n", + " keras.layers.Reshape([28, 28])\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(stacked_ae_1_by_1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "stacked_ae_1_by_1.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.5),\n", + " metrics=[\"accuracy\"])\n", + "history = stacked_ae_1_by_1.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(stacked_ae_1_by_1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualizing the extracted features" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "weights1 = stacked_ae_1_by_1.layers[1].get_weights()[0]\n", + "plt.figure(figsize=(8, 2))\n", + "n_rows, n_cols = 2, 8\n", + "for row in range(n_rows):\n", + " for col in range(n_cols):\n", + " index = row * n_cols + col\n", + " plt.subplot(n_rows, n_cols, index + 1)\n", + " plt.imshow(weights1[:, index].reshape(28, 28), cmap=\"Greys\")\n", + " plt.axis(\"off\")\n", + "\n", + "save_fig(\"extracted_features_plot\", tight_layout=False) # not shown\n", + "plt.show() # not shown" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using Convolutional Layers Instead of Dense Layers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (i.e., 2 stacked Autoencoders)." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "conv_encoder = keras.models.Sequential([\n", + " keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n", + " keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2)\n", + "])\n", + "conv_decoder = keras.models.Sequential([\n", + " keras.layers.Conv2DTranspose(32, kernel_size=3, strides=2, padding=\"VALID\", activation=\"selu\",\n", + " input_shape=[3, 3, 64]),\n", + " keras.layers.Conv2DTranspose(16, kernel_size=3, strides=2, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.Conv2DTranspose(1, kernel_size=3, strides=2, padding=\"SAME\", activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "conv_ae = keras.models.Sequential([conv_encoder, conv_decoder])\n", + "\n", + "conv_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", + " metrics=[\"accuracy\"])\n", + "history = conv_ae.fit(X_train, X_train, epochs=5,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "conv_encoder.summary()\n", + "conv_decoder.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(conv_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unsupervised pretraining" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create a small neural network for MNIST classification:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "X_train_small = X_train[:500]\n", + "y_train_small = y_train[:500]\n", + "\n", + "classifier = keras.models.Sequential([\n", + " keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n", + " keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n", + " keras.layers.MaxPool2D(pool_size=2),\n", + " keras.layers.Flatten(),\n", + " keras.layers.Dense(20, activation=\"selu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "classifier.compile(loss=\"sparse_categorical_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.02), metrics=[\"accuracy\"])\n", + "history = classifier.fit(X_train_small, y_train_small, epochs=20, validation_data=[X_valid, y_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(history.history).plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "conv_encoder_clone = keras.models.clone_model(conv_encoder)\n", + "\n", + "pretrained_clf = keras.models.Sequential([\n", + " conv_encoder_clone,\n", + " keras.layers.Flatten(),\n", + " keras.layers.Dense(20, activation=\"selu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "conv_encoder_clone.trainable = False\n", + "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=0.02),\n", + " metrics=[\"accuracy\"])\n", + "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=30,\n", + " validation_data=[X_valid, y_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "conv_encoder_clone.trainable = True\n", + "pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=0.02),\n", + " metrics=[\"accuracy\"])\n", + "history = pretrained_clf.fit(X_train_small, y_train_small, epochs=20,\n", + " validation_data=[X_valid, y_valid])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Stacked denoising Autoencoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using Gaussian noise:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "denoising_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.GaussianNoise(1.0),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(30, activation=\"selu\")\n", + "])\n", + "denoising_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "denoising_ae = keras.models.Sequential([denoising_encoder, denoising_decoder])\n", + "denoising_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", + " metrics=[\"accuracy\"])\n", + "history = denoising_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(denoising_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using dropout:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "dropout_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dropout(0.5),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(30, activation=\"selu\")\n", + "])\n", + "dropout_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "dropout_ae = keras.models.Sequential([dropout_encoder, dropout_decoder])\n", + "dropout_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", + " metrics=[\"accuracy\"])\n", + "history = dropout_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(dropout_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sparse Autoencoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's build a simple stacked autoencoder, but this time we will use the sigmoid activation function for the coding layer, to ensure that the coding values range from 0 to 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "simple_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(30, activation=\"sigmoid\"),\n", + "])\n", + "simple_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "simple_ae = keras.models.Sequential([simple_encoder, simple_decoder])\n", + "simple_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n", + " metrics=[\"accuracy\"])\n", + "history = simple_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(simple_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create a couple functions to print nice activation histograms:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_percent_hist(ax, data, bins):\n", + " counts, _ = np.histogram(data, bins=bins)\n", + " widths = bins[1:] - bins[:-1]\n", + " x = bins[:-1] + widths / 2\n", + " ax.bar(x, counts / len(data), width=widths*0.8)\n", + " ax.xaxis.set_ticks(bins)\n", + " ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(\n", + " lambda y, position: \"{}%\".format(int(np.round(100 * y)))))\n", + " ax.grid(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_activations_histogram(encoder, height=1, n_bins=10):\n", + " X_valid_codings = encoder(X_valid).numpy()\n", + " activation_means = X_valid_codings.mean(axis=0)\n", + " mean = activation_means.mean()\n", + " bins = np.linspace(0, 1, n_bins + 1)\n", + "\n", + " fig, [ax1, ax2] = plt.subplots(figsize=(10, 3), nrows=1, ncols=2, sharey=True)\n", + " plot_percent_hist(ax1, X_valid_codings.ravel(), bins)\n", + " ax1.plot([mean, mean], [0, height], \"k--\", label=\"Overall Mean = {:.2f}\".format(mean))\n", + " ax1.legend(loc=\"upper center\", fontsize=14)\n", + " ax1.set_xlabel(\"Activation\")\n", + " ax1.set_ylabel(\"% Activations\")\n", + " ax1.axis([0, 1, 0, height])\n", + " plot_percent_hist(ax2, activation_means, bins)\n", + " ax2.plot([mean, mean], [0, height], \"k--\")\n", + " ax2.set_xlabel(\"Neuron Mean Activation\")\n", + " ax2.set_ylabel(\"% Neurons\")\n", + " ax2.axis([0, 1, 0, height])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's use these functions to plot histograms of the activations of the encoding layer. The histogram on the left shows the distribution of all the activations. You can see that values close to 0 or 1 are more frequent overall, which is consistent with the saturating nature of the sigmoid function. The histogram on the right shows the distribution of mean neuron activations: you can see that most neurons have a mean activation close to 0.5. Both histograms tell us that each neuron tends to either fire close to 0 or 1, with about 50% probability each. However, some neurons fire almost all the time (right side of the right histogram)." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "plot_activations_histogram(simple_encoder, height=0.35)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's add $\\ell_1$ regularization to the coding layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "sparse_l1_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(300, activation=\"sigmoid\"),\n", + " keras.layers.ActivityRegularization(l1=1e-3) # Alternatively, you could add\n", + " # activity_regularizer=keras.regularizers.l1(1e-3)\n", + " # to the previous layer.\n", + "])\n", + "sparse_l1_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[300]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "sparse_l1_ae = keras.models.Sequential([sparse_l1_encoder, sparse_l1_decoder])\n", + "sparse_l1_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", + " metrics=[\"accuracy\"])\n", + "history = sparse_l1_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(sparse_l1_ae)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "plot_activations_histogram(sparse_l1_encoder, height=1.)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's use the KL Divergence loss instead to ensure sparsity, and target 10% sparsity rather than 0%:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "p = 0.1\n", + "q = np.linspace(0.001, 0.999, 500)\n", + "kl_div = p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q))\n", + "mse = (p - q)**2\n", + "mae = np.abs(p - q)\n", + "plt.plot([p, p], [0, 0.3], \"k:\")\n", + "plt.text(0.05, 0.32, \"Target\\nsparsity\", fontsize=14)\n", + "plt.plot(q, kl_div, \"b-\", label=\"KL divergence\")\n", + "plt.plot(q, mae, \"g--\", label=r\"MAE ($\\ell_1$)\")\n", + "plt.plot(q, mse, \"r--\", linewidth=1, label=r\"MSE ($\\ell_2$)\")\n", + "plt.legend(loc=\"upper left\", fontsize=14)\n", + "plt.xlabel(\"Actual sparsity\")\n", + "plt.ylabel(\"Cost\", rotation=0)\n", + "plt.axis([0, 1, 0, 0.95])\n", + "save_fig(\"sparsity_loss_plot\")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "K = keras.backend\n", + "\n", + "class KLDivergenceRegularizer(keras.regularizers.Regularizer):\n", + " def __init__(self, weight, target=0.1):\n", + " self.weight = weight\n", + " self.target = target\n", + " def __call__(self, inputs):\n", + " mean_activities = K.mean(inputs, axis=0)\n", + " return self.weight * (\n", + " keras.losses.kullback_leibler_divergence(self.target, mean_activities) +\n", + " keras.losses.kullback_leibler_divergence(1. - self.target, 1. - mean_activities))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "kld_reg = KLDivergenceRegularizer(weight=0.05, target=0.1)\n", + "sparse_kl_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.Dense(300, activation=\"sigmoid\", activity_regularizer=kld_reg)\n", + "])\n", + "sparse_kl_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[300]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "sparse_kl_ae = keras.models.Sequential([sparse_kl_encoder, sparse_kl_decoder])\n", + "sparse_kl_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", + " metrics=[\"accuracy\"])\n", + "history = sparse_kl_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(sparse_kl_ae)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "plot_activations_histogram(sparse_kl_encoder)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hashing Autoencoder" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "hashing_encoder = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"selu\"),\n", + " keras.layers.GaussianNoise(15.),\n", + " keras.layers.Dense(16, activation=\"sigmoid\"),\n", + "])\n", + "hashing_decoder = keras.models.Sequential([\n", + " keras.layers.Dense(100, activation=\"selu\", input_shape=[16]),\n", + " keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n", + " keras.layers.Reshape([28, 28])\n", + "])\n", + "hashing_ae = keras.models.Sequential([hashing_encoder, hashing_decoder])\n", + "hashing_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n", + " metrics=[\"accuracy\"])\n", + "history = hashing_ae.fit(X_train, X_train, epochs=10,\n", + " validation_data=[X_valid, X_valid])" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(hashing_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "plot_activations_histogram(hashing_encoder)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "hashes = np.round(hashing_encoder.predict(X_valid)).astype(np.int32)\n", + "hashes *= np.array([[2**bit for bit in range(16)]])\n", + "hashes = hashes.sum(axis=1)\n", + "for h in hashes[:5]:\n", + " print(\"{:016b}\".format(h))\n", + "print(\"...\")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "n_bits = 4\n", + "n_images = 8\n", + "plt.figure(figsize=(n_images, n_bits))\n", + "for bit_index in range(n_bits):\n", + " in_bucket = (hashes & 2**bit_index != 0)\n", + " for index, image in zip(range(n_images), X_valid[in_bucket]):\n", + " plt.subplot(n_bits, n_images, bit_index * n_images + index + 1)\n", + " plt.imshow(image, cmap=\"binary\")\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Variational Autoencoder" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "class Sampling(keras.layers.Layer):\n", + " def call(self, inputs):\n", + " mean, log_var = inputs\n", + " return mean + K.exp(log_var / 2) * K.random_normal(shape=tf.shape(log_var))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "codings_size = 30\n", + "\n", + "inputs = keras.layers.Input(shape=[28, 28])\n", + "z = keras.layers.Flatten()(inputs)\n", + "z = keras.layers.Dense(150, activation=\"selu\")(z)\n", + "z = keras.layers.Dense(100, activation=\"selu\")(z)\n", + "codings_mean = keras.layers.Dense(codings_size)(z)\n", + "codings_log_var = keras.layers.Dense(codings_size)(z)\n", + "codings = Sampling()([codings_mean, codings_log_var])\n", + "variational_encoder = keras.models.Model(inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])\n", + "\n", + "decoder_inputs = keras.layers.Input(shape=[codings_size])\n", + "x = keras.layers.Dense(100, activation=\"selu\")(decoder_inputs)\n", + "x = keras.layers.Dense(150, activation=\"selu\")(x)\n", + "x = keras.layers.Dense(28 * 28, activation=\"sigmoid\")(x)\n", + "outputs = keras.layers.Reshape([28, 28])(x)\n", + "variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])\n", + "\n", + "_, _, codings = variational_encoder(inputs)\n", + "reconstructions = variational_decoder(codings)\n", + "variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])\n", + "\n", + "kld_loss = -0.5 * K.sum(1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean), axis=-1)\n", + "variational_ae.add_loss(K.mean(kld_loss) / 784.)\n", + "variational_ae.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\", metrics=[\"accuracy\"])\n", + "history = variational_ae.fit(X_train, X_train, epochs=50,\n", + " validation_data=[X_valid, X_valid],\n", + " batch_size=128)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "show_reconstructions(variational_ae)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate Fashion Images" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's train the model and generate a few random fashion images:" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "n_rows = 6\n", + "n_cols = 10\n", + "codings_rnd = np.random.normal(size=[n_rows * n_cols, codings_size])\n", + "images = variational_decoder.predict(codings_rnd)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_multiple_images(images, n_rows, n_cols, pad=2):\n", + " images = images - images.min() # make the minimum == 0, so the padding looks white\n", + " w,h = images.shape[1:]\n", + " image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))\n", + " for y in range(n_rows):\n", + " for x in range(n_cols):\n", + " image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]\n", + " plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 5))\n", + "plot_multiple_images(images, n_rows, n_cols)\n", + "save_fig(\"generated_fashion_images_plot\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Encode & Decode" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "n_iterations = 3\n", + "n_images = 10\n", + "source_codings = np.random.normal(size=[n_images, codings_size])\n", + "target_codings = np.roll(source_codings, -1, axis=0)\n", + "images = []\n", + "for iteration in range(n_iterations):\n", + " codings_interpolate = source_codings + (target_codings - source_codings) * iteration / n_iterations\n", + " images.append(variational_decoder(codings_interpolate).numpy())\n", + "images = np.concatenate(images)\n", + "\n", + "plt.figure(figsize=(8, 3))\n", + "plot_multiple_images(images, n_iterations, n_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "nav_menu": { + "height": "381px", + "width": "453px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/work_in_progress/15_autoencoders.ipynb b/work_in_progress/15_autoencoders.ipynb deleted file mode 100644 index 1e8299a..0000000 --- a/work_in_progress/15_autoencoders.ipynb +++ /dev/null @@ -1,1756 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Chapter 15 – Autoencoders**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "_This notebook contains all the sample code and solutions to the exercises in chapter 15._" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# To support both python 2 and python 3\n", - "from __future__ import division, print_function, unicode_literals\n", - "\n", - "# Common imports\n", - "import numpy as np\n", - "import os\n", - "import sys\n", - "\n", - "# to make this notebook's output stable across runs\n", - "def reset_graph(seed=42):\n", - " tf.reset_default_graph()\n", - " tf.set_random_seed(seed)\n", - " np.random.seed(seed)\n", - "\n", - "# To plot pretty figures\n", - "%matplotlib inline\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "plt.rcParams['axes.labelsize'] = 14\n", - "plt.rcParams['xtick.labelsize'] = 12\n", - "plt.rcParams['ytick.labelsize'] = 12\n", - "\n", - "# Where to save the figures\n", - "PROJECT_ROOT_DIR = \".\"\n", - "CHAPTER_ID = \"autoencoders\"\n", - "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", - " print(\"Saving figure\", fig_id)\n", - " if tight_layout:\n", - " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A couple utility functions to plot grayscale 28x28 image:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_image(image, shape=[28, 28]):\n", - " plt.imshow(image.reshape(shape), cmap=\"Greys\", interpolation=\"nearest\")\n", - " plt.axis(\"off\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_multiple_images(images, n_rows, n_cols, pad=2):\n", - " images = images - images.min() # make the minimum == 0, so the padding looks white\n", - " w,h = images.shape[1:]\n", - " image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))\n", - " for y in range(n_rows):\n", - " for x in range(n_cols):\n", - " image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]\n", - " plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n", - " plt.axis(\"off\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# PCA with a linear Autoencoder" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Build 3D dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy.random as rnd\n", - "\n", - "rnd.seed(4)\n", - "m = 200\n", - "w1, w2 = 0.1, 0.3\n", - "noise = 0.1\n", - "\n", - "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n", - "data = np.empty((m, 3))\n", - "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n", - "data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n", - "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Normalize the data:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.preprocessing import StandardScaler\n", - "scaler = StandardScaler()\n", - "X_train = scaler.fit_transform(data[:100])\n", - "X_test = scaler.transform(data[100:])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's build the Autoencoder..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n", - "\n", - "The main differences relevant to this chapter are:\n", - "* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n", - "* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n", - "* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n", - "* the default activation is `None` instead of `tf.nn.relu`" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "\n", - "reset_graph()\n", - "\n", - "n_inputs = 3\n", - "n_hidden = 2 # codings\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "hidden = tf.layers.dense(X, n_hidden)\n", - "outputs = tf.layers.dense(hidden, n_outputs)\n", - "\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(reconstruction_loss)\n", - "\n", - "init = tf.global_variables_initializer()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "n_iterations = 1000\n", - "codings = hidden\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for iteration in range(n_iterations):\n", - " training_op.run(feed_dict={X: X_train})\n", - " codings_val = codings.eval(feed_dict={X: X_test})" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "fig = plt.figure(figsize=(4,3))\n", - "plt.plot(codings_val[:,0], codings_val[:, 1], \"b.\")\n", - "plt.xlabel(\"$z_1$\", fontsize=18)\n", - "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", - "save_fig(\"linear_autoencoder_pca_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Stacked Autoencoders" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's use MNIST:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "from tensorflow.examples.tutorials.mnist import input_data\n", - "mnist = input_data.read_data_sets(\"/tmp/data/\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train all layers at once" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "from functools import partial\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150 # codings\n", - "n_hidden3 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01\n", - "l2_reg = 0.0001\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "\n", - "he_init = tf.contrib.layers.variance_scaling_initializer() # He initialization\n", - "#Equivalent to:\n", - "#he_init = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n", - "l2_regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", - "my_dense_layer = partial(tf.layers.dense,\n", - " activation=tf.nn.elu,\n", - " kernel_initializer=he_init,\n", - " kernel_regularizer=l2_regularizer)\n", - "\n", - "hidden1 = my_dense_layer(X, n_hidden1)\n", - "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", - "hidden3 = my_dense_layer(hidden2, n_hidden3)\n", - "outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n", - "\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", - "\n", - "reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", - "loss = tf.add_n([reconstruction_loss] + reg_losses)\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver() # not shown in the book" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's train it! Note that we don't feed target values (`y_batch` is not used). This is unsupervised training." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 5\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\") # not shown in the book\n", - " sys.stdout.flush() # not shown\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch}) # not shown\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train) # not shown\n", - " saver.save(sess, \"./my_model_all_layers.ckpt\") # not shown" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function loads the model, evaluates it on the test set (it measures the reconstruction error), then it displays the original image and its reconstruction:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "def show_reconstructed_digits(X, outputs, model_path = None, n_test_digits = 2):\n", - " with tf.Session() as sess:\n", - " if model_path:\n", - " saver.restore(sess, model_path)\n", - " X_test = mnist.test.images[:n_test_digits]\n", - " outputs_val = outputs.eval(feed_dict={X: X_test})\n", - "\n", - " fig = plt.figure(figsize=(8, 3 * n_test_digits))\n", - " for digit_index in range(n_test_digits):\n", - " plt.subplot(n_test_digits, 2, digit_index * 2 + 1)\n", - " plot_image(X_test[digit_index])\n", - " plt.subplot(n_test_digits, 2, digit_index * 2 + 2)\n", - " plot_image(outputs_val[digit_index])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_all_layers.ckpt\")\n", - "save_fig(\"reconstruction_plot\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Tying weights" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150 # codings\n", - "n_hidden3 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01\n", - "l2_reg = 0.0005" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "activation = tf.nn.elu\n", - "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "\n", - "weights1_init = initializer([n_inputs, n_hidden1])\n", - "weights2_init = initializer([n_hidden1, n_hidden2])\n", - "\n", - "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", - "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", - "weights3 = tf.transpose(weights2, name=\"weights3\") # tied weights\n", - "weights4 = tf.transpose(weights1, name=\"weights4\") # tied weights\n", - "\n", - "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", - "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n", - "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n", - "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n", - "\n", - "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", - "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", - "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n", - "outputs = tf.matmul(hidden3, weights4) + biases4\n", - "\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", - "reg_loss = regularizer(weights1) + regularizer(weights2)\n", - "loss = reconstruction_loss + reg_loss\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "init = tf.global_variables_initializer()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 5\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", - " saver.save(sess, \"./my_model_tying_weights.ckpt\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_tying_weights.ckpt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training one Autoencoder at a time in multiple graphs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are many ways to train one Autoencoder at a time. The first approach is to train each Autoencoder using a different graph, then we create the Stacked Autoencoder by simply initializing it with the weights and biases copied from these Autoencoders." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's create a function that will train one autoencoder and return the transformed training set (i.e., the output of the hidden layer) and the model parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "from functools import partial\n", - "\n", - "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size,\n", - " learning_rate = 0.01, l2_reg = 0.0005, seed=42,\n", - " hidden_activation=tf.nn.elu,\n", - " output_activation=tf.nn.elu):\n", - " graph = tf.Graph()\n", - " with graph.as_default():\n", - " tf.set_random_seed(seed)\n", - "\n", - " n_inputs = X_train.shape[1]\n", - "\n", - " X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - " \n", - " my_dense_layer = partial(\n", - " tf.layers.dense,\n", - " kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n", - " kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n", - "\n", - " hidden = my_dense_layer(X, n_neurons, activation=hidden_activation, name=\"hidden\")\n", - " outputs = my_dense_layer(hidden, n_inputs, activation=output_activation, name=\"outputs\")\n", - "\n", - " reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", - "\n", - " reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", - " loss = tf.add_n([reconstruction_loss] + reg_losses)\n", - "\n", - " optimizer = tf.train.AdamOptimizer(learning_rate)\n", - " training_op = optimizer.minimize(loss)\n", - "\n", - " init = tf.global_variables_initializer()\n", - "\n", - " with tf.Session(graph=graph) as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = len(X_train) // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " indices = rnd.permutation(len(X_train))[:batch_size]\n", - " X_batch = X_train[indices]\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", - " params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n", - " hidden_val = hidden.eval(feed_dict={X: X_train})\n", - " return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's train two Autoencoders. The first one is trained on the training data, and the second is trained on the previous Autoencoder's hidden layer output:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "hidden_output, W1, b1, W4, b4 = train_autoencoder(mnist.train.images, n_neurons=300, n_epochs=4, batch_size=150,\n", - " output_activation=None)\n", - "_, W2, b2, W3, b3 = train_autoencoder(hidden_output, n_neurons=150, n_epochs=4, batch_size=150)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we can create a Stacked Autoencoder by simply reusing the weights and biases from the Autoencoders we just trained:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28*28\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "hidden1 = tf.nn.elu(tf.matmul(X, W1) + b1)\n", - "hidden2 = tf.nn.elu(tf.matmul(hidden1, W2) + b2)\n", - "hidden3 = tf.nn.elu(tf.matmul(hidden2, W3) + b3)\n", - "outputs = tf.matmul(hidden3, W4) + b4" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training one Autoencoder at a time in a single graph" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another approach is to use a single graph. To do this, we create the graph for the full Stacked Autoencoder, but then we also add operations to train each Autoencoder independently: phase 1 trains the bottom and top layer (ie. the first Autoencoder) and phase 2 trains the two middle layers (ie. the second Autoencoder)." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150 # codings\n", - "n_hidden3 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01\n", - "l2_reg = 0.0001\n", - "\n", - "activation = tf.nn.elu\n", - "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "\n", - "weights1_init = initializer([n_inputs, n_hidden1])\n", - "weights2_init = initializer([n_hidden1, n_hidden2])\n", - "weights3_init = initializer([n_hidden2, n_hidden3])\n", - "weights4_init = initializer([n_hidden3, n_outputs])\n", - "\n", - "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", - "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", - "weights3 = tf.Variable(weights3_init, dtype=tf.float32, name=\"weights3\")\n", - "weights4 = tf.Variable(weights4_init, dtype=tf.float32, name=\"weights4\")\n", - "\n", - "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", - "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n", - "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n", - "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n", - "\n", - "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", - "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", - "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n", - "outputs = tf.matmul(hidden3, weights4) + biases4\n", - "\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "\n", - "with tf.name_scope(\"phase1\"):\n", - " phase1_outputs = tf.matmul(hidden1, weights4) + biases4 # bypass hidden2 and hidden3\n", - " phase1_reconstruction_loss = tf.reduce_mean(tf.square(phase1_outputs - X))\n", - " phase1_reg_loss = regularizer(weights1) + regularizer(weights4)\n", - " phase1_loss = phase1_reconstruction_loss + phase1_reg_loss\n", - " phase1_training_op = optimizer.minimize(phase1_loss)\n", - "\n", - "with tf.name_scope(\"phase2\"):\n", - " phase2_reconstruction_loss = tf.reduce_mean(tf.square(hidden3 - hidden1))\n", - " phase2_reg_loss = regularizer(weights2) + regularizer(weights3)\n", - " phase2_loss = phase2_reconstruction_loss + phase2_reg_loss\n", - " train_vars = [weights2, biases2, weights3, biases3]\n", - " phase2_training_op = optimizer.minimize(phase2_loss, var_list=train_vars) # freeze hidden1" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "training_ops = [phase1_training_op, phase2_training_op]\n", - "reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]\n", - "n_epochs = [4, 4]\n", - "batch_sizes = [150, 150]\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for phase in range(2):\n", - " print(\"Training phase #{}\".format(phase + 1))\n", - " for epoch in range(n_epochs[phase]):\n", - " n_batches = mnist.train.num_examples // batch_sizes[phase]\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n", - " sess.run(training_ops[phase], feed_dict={X: X_batch})\n", - " loss_train = reconstruction_losses[phase].eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", - " saver.save(sess, \"./my_model_one_at_a_time.ckpt\")\n", - " loss_test = reconstruction_loss.eval(feed_dict={X: mnist.test.images})\n", - " print(\"Test MSE:\", loss_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cache the frozen layer outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "training_ops = [phase1_training_op, phase2_training_op]\n", - "reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]\n", - "n_epochs = [4, 4]\n", - "batch_sizes = [150, 150]\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for phase in range(2):\n", - " print(\"Training phase #{}\".format(phase + 1))\n", - " if phase == 1:\n", - " hidden1_cache = hidden1.eval(feed_dict={X: mnist.train.images})\n", - " for epoch in range(n_epochs[phase]):\n", - " n_batches = mnist.train.num_examples // batch_sizes[phase]\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " if phase == 1:\n", - " indices = rnd.permutation(mnist.train.num_examples)\n", - " hidden1_batch = hidden1_cache[indices[:batch_sizes[phase]]]\n", - " feed_dict = {hidden1: hidden1_batch}\n", - " sess.run(training_ops[phase], feed_dict=feed_dict)\n", - " else:\n", - " X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n", - " feed_dict = {X: X_batch}\n", - " sess.run(training_ops[phase], feed_dict=feed_dict)\n", - " loss_train = reconstruction_losses[phase].eval(feed_dict=feed_dict)\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", - " saver.save(sess, \"./my_model_cache_frozen.ckpt\")\n", - " loss_test = reconstruction_loss.eval(feed_dict={X: mnist.test.images})\n", - " print(\"Test MSE:\", loss_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Visualizing the Reconstructions" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "n_test_digits = 2\n", - "X_test = mnist.test.images[:n_test_digits]\n", - "\n", - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_one_at_a_time.ckpt\") # not shown in the book\n", - " outputs_val = outputs.eval(feed_dict={X: X_test})\n", - "\n", - "def plot_image(image, shape=[28, 28]):\n", - " plt.imshow(image.reshape(shape), cmap=\"Greys\", interpolation=\"nearest\")\n", - " plt.axis(\"off\")\n", - "\n", - "for digit_index in range(n_test_digits):\n", - " plt.subplot(n_test_digits, 2, digit_index * 2 + 1)\n", - " plot_image(X_test[digit_index])\n", - " plt.subplot(n_test_digits, 2, digit_index * 2 + 2)\n", - " plot_image(outputs_val[digit_index])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Visualizing the extracted features" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_one_at_a_time.ckpt\") # not shown in the book\n", - " weights1_val = weights1.eval()\n", - "\n", - "for i in range(5):\n", - " plt.subplot(1, 5, i + 1)\n", - " plot_image(weights1_val.T[i])\n", - "\n", - "save_fig(\"extracted_features_plot\") # not shown\n", - "plt.show() # not shown" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Unsupervised pretraining" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's create a small neural network for MNIST classification:" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150\n", - "n_outputs = 10\n", - "\n", - "learning_rate = 0.01\n", - "l2_reg = 0.0005\n", - "\n", - "activation = tf.nn.elu\n", - "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "y = tf.placeholder(tf.int32, shape=[None])\n", - "\n", - "weights1_init = initializer([n_inputs, n_hidden1])\n", - "weights2_init = initializer([n_hidden1, n_hidden2])\n", - "weights3_init = initializer([n_hidden2, n_outputs])\n", - "\n", - "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", - "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", - "weights3 = tf.Variable(weights3_init, dtype=tf.float32, name=\"weights3\")\n", - "\n", - "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", - "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n", - "biases3 = tf.Variable(tf.zeros(n_outputs), name=\"biases3\")\n", - "\n", - "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", - "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", - "logits = tf.matmul(hidden2, weights3) + biases3\n", - "\n", - "cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", - "reg_loss = regularizer(weights1) + regularizer(weights2) + regularizer(weights3)\n", - "loss = cross_entropy + reg_loss\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "correct = tf.nn.in_top_k(logits, y, 1)\n", - "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", - "\n", - "init = tf.global_variables_initializer()\n", - "pretrain_saver = tf.train.Saver([weights1, weights2, biases1, biases2])\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Regular training (without pretraining):" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 4\n", - "batch_size = 150\n", - "n_labeled_instances = 20000\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = n_labeled_instances // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " indices = rnd.permutation(n_labeled_instances)[:batch_size]\n", - " X_batch, y_batch = mnist.train.images[indices], mnist.train.labels[indices]\n", - " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train accuracy:\", accuracy_val, end=\" \")\n", - " saver.save(sess, \"./my_model_supervised.ckpt\")\n", - " accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", - " print(\"Test accuracy:\", accuracy_val)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now reusing the first two layers of the autoencoder we pretrained:" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 4\n", - "batch_size = 150\n", - "n_labeled_instances = 20000\n", - "\n", - "#training_op = optimizer.minimize(loss, var_list=[weights3, biases3]) # Freeze layers 1 and 2 (optional)\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " pretrain_saver.restore(sess, \"./my_model_cache_frozen.ckpt\")\n", - " for epoch in range(n_epochs):\n", - " n_batches = n_labeled_instances // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " indices = rnd.permutation(n_labeled_instances)[:batch_size]\n", - " X_batch, y_batch = mnist.train.images[indices], mnist.train.labels[indices]\n", - " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train accuracy:\", accuracy_val, end=\"\\t\")\n", - " saver.save(sess, \"./my_model_supervised_pretrained.ckpt\")\n", - " accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", - " print(\"Test accuracy:\", accuracy_val)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Stacked denoising Autoencoder" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n", - "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n", - "* the `is_training` parameter is renamed to `training`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using Gaussian noise:" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150 # codings\n", - "n_hidden3 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "noise_level = 1.0\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "X_noisy = X + noise_level * tf.random_normal(tf.shape(X))\n", - "\n", - "hidden1 = tf.layers.dense(X_noisy, n_hidden1, activation=tf.nn.relu,\n", - " name=\"hidden1\")\n", - "hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book\n", - " name=\"hidden2\") # not shown\n", - "hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown\n", - " name=\"hidden3\") # not shown\n", - "outputs = tf.layers.dense(hidden3, n_outputs, name=\"outputs\") # not shown\n", - "\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(reconstruction_loss)\n", - " \n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 10\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", - " saver.save(sess, \"./my_model_stacked_denoising_gaussian.ckpt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using dropout:" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150 # codings\n", - "n_hidden3 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "dropout_rate = 0.3\n", - "\n", - "training = tf.placeholder_with_default(False, shape=(), name='training')\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "X_drop = tf.layers.dropout(X, dropout_rate, training=training)\n", - "\n", - "hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu,\n", - " name=\"hidden1\")\n", - "hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book\n", - " name=\"hidden2\") # not shown\n", - "hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown\n", - " name=\"hidden3\") # not shown\n", - "outputs = tf.layers.dense(hidden3, n_outputs, name=\"outputs\") # not shown\n", - "\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(reconstruction_loss)\n", - " \n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 10\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch, training: True})\n", - " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", - " saver.save(sess, \"./my_model_stacked_denoising_dropout.ckpt\")" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_stacked_denoising_dropout.ckpt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Sparse Autoencoder" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "p = 0.1\n", - "q = np.linspace(0.001, 0.999, 500)\n", - "kl_div = p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q))\n", - "mse = (p - q)**2\n", - "plt.plot([p, p], [0, 0.3], \"k:\")\n", - "plt.text(0.05, 0.32, \"Target\\nsparsity\", fontsize=14)\n", - "plt.plot(q, kl_div, \"b-\", label=\"KL divergence\")\n", - "plt.plot(q, mse, \"r--\", label=\"MSE\")\n", - "plt.legend(loc=\"upper left\")\n", - "plt.xlabel(\"Actual sparsity\")\n", - "plt.ylabel(\"Cost\", rotation=0)\n", - "plt.axis([0, 1, 0, 0.95])\n", - "save_fig(\"sparsity_loss_plot\")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 1000 # sparse codings\n", - "n_outputs = n_inputs" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "def kl_divergence(p, q):\n", - " # Kullback Leibler divergence\n", - " return p * tf.log(p / q) + (1 - p) * tf.log((1 - p) / (1 - q))\n", - "\n", - "learning_rate = 0.01\n", - "sparsity_target = 0.1\n", - "sparsity_weight = 0.2\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs]) # not shown in the book\n", - "\n", - "hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.sigmoid) # not shown\n", - "outputs = tf.layers.dense(hidden1, n_outputs) # not shown\n", - "\n", - "hidden1_mean = tf.reduce_mean(hidden1, axis=0) # batch mean\n", - "sparsity_loss = tf.reduce_sum(kl_divergence(sparsity_target, hidden1_mean))\n", - "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE\n", - "loss = reconstruction_loss + sparsity_weight * sparsity_loss\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(loss)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 100\n", - "batch_size = 1000\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " reconstruction_loss_val, sparsity_loss_val, loss_val = sess.run([reconstruction_loss, sparsity_loss, loss], feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", reconstruction_loss_val, \"\\tSparsity loss:\", sparsity_loss_val, \"\\tTotal loss:\", loss_val)\n", - " saver.save(sess, \"./my_model_sparse.ckpt\")" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_sparse.ckpt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the coding layer must output values from 0 to 1, which is why we use the sigmoid activation function:" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.sigmoid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To speed up training, you can normalize the inputs between 0 and 1, and use the cross entropy instead of the MSE for the cost function:" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "logits = tf.layers.dense(hidden1, n_outputs)\n", - "outputs = tf.nn.sigmoid(logits)\n", - "\n", - "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)\n", - "reconstruction_loss = tf.reduce_mean(xentropy)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Variational Autoencoder" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "from functools import partial\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 500\n", - "n_hidden2 = 500\n", - "n_hidden3 = 20 # codings\n", - "n_hidden4 = n_hidden2\n", - "n_hidden5 = n_hidden1\n", - "n_outputs = n_inputs\n", - "learning_rate = 0.001\n", - "\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", - "\n", - "my_dense_layer = partial(\n", - " tf.layers.dense,\n", - " activation=tf.nn.elu,\n", - " kernel_initializer=initializer)\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_inputs])\n", - "hidden1 = my_dense_layer(X, n_hidden1)\n", - "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", - "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n", - "hidden3_sigma = my_dense_layer(hidden2, n_hidden3, activation=None)\n", - "noise = tf.random_normal(tf.shape(hidden3_sigma), dtype=tf.float32)\n", - "hidden3 = hidden3_mean + hidden3_sigma * noise\n", - "hidden4 = my_dense_layer(hidden3, n_hidden4)\n", - "hidden5 = my_dense_layer(hidden4, n_hidden5)\n", - "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n", - "outputs = tf.sigmoid(logits)\n", - "\n", - "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)\n", - "reconstruction_loss = tf.reduce_sum(xentropy)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "eps = 1e-10 # smoothing term to avoid computing log(0) which is NaN\n", - "latent_loss = 0.5 * tf.reduce_sum(\n", - " tf.square(hidden3_sigma) + tf.square(hidden3_mean)\n", - " - 1 - tf.log(eps + tf.square(hidden3_sigma)))" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "loss = reconstruction_loss + latent_loss\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "n_epochs = 50\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train total loss:\", loss_val, \"\\tReconstruction loss:\", reconstruction_loss_val, \"\\tLatent loss:\", latent_loss_val)\n", - " saver.save(sess, \"./my_model_variational.ckpt\")" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "from functools import partial\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 500\n", - "n_hidden2 = 500\n", - "n_hidden3 = 20 # codings\n", - "n_hidden4 = n_hidden2\n", - "n_hidden5 = n_hidden1\n", - "n_outputs = n_inputs\n", - "learning_rate = 0.001\n", - "\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", - "my_dense_layer = partial(\n", - " tf.layers.dense,\n", - " activation=tf.nn.elu,\n", - " kernel_initializer=initializer)\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_inputs])\n", - "hidden1 = my_dense_layer(X, n_hidden1)\n", - "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", - "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n", - "hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n", - "noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n", - "hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n", - "hidden4 = my_dense_layer(hidden3, n_hidden4)\n", - "hidden5 = my_dense_layer(hidden4, n_hidden5)\n", - "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n", - "outputs = tf.sigmoid(logits)\n", - "\n", - "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)\n", - "reconstruction_loss = tf.reduce_sum(xentropy)\n", - "latent_loss = 0.5 * tf.reduce_sum(\n", - " tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n", - "loss = reconstruction_loss + latent_loss\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate digits" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's train the model and generate a few random digits:" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "n_digits = 60\n", - "n_epochs = 50\n", - "batch_size = 150\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\") # not shown in the book\n", - " sys.stdout.flush() # not shown\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch}) # not shown\n", - " print(\"\\r{}\".format(epoch), \"Train total loss:\", loss_val, \"\\tReconstruction loss:\", reconstruction_loss_val, \"\\tLatent loss:\", latent_loss_val) # not shown\n", - " saver.save(sess, \"./my_model_variational.ckpt\") # not shown\n", - " \n", - " codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n", - " outputs_val = outputs.eval(feed_dict={hidden3: codings_rnd})" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(8,50)) # not shown in the book\n", - "for iteration in range(n_digits):\n", - " plt.subplot(n_digits, 10, iteration + 1)\n", - " plot_image(outputs_val[iteration])" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "n_rows = 6\n", - "n_cols = 10\n", - "plot_multiple_images(outputs_val.reshape(-1, 28, 28), n_rows, n_cols)\n", - "save_fig(\"generated_digits_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the latent loss is computed differently in this second variant:" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "latent_loss = 0.5 * tf.reduce_sum(\n", - " tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Encode & Decode" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Encode:" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [], - "source": [ - "n_digits = 3\n", - "X_test, y_test = mnist.test.next_batch(batch_size)\n", - "codings = hidden3\n", - "\n", - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_variational.ckpt\")\n", - " codings_val = codings.eval(feed_dict={X: X_test})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Decode:" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_variational.ckpt\")\n", - " outputs_val = outputs.eval(feed_dict={codings: codings_val})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's plot the reconstructions:" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "fig = plt.figure(figsize=(8, 2.5 * n_digits))\n", - "for iteration in range(n_digits):\n", - " plt.subplot(n_digits, 2, 1 + 2 * iteration)\n", - " plot_image(X_test[iteration])\n", - " plt.subplot(n_digits, 2, 2 + 2 * iteration)\n", - " plot_image(outputs_val[iteration])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Interpolate digits" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "n_iterations = 3\n", - "n_digits = 6\n", - "codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n", - "\n", - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_variational.ckpt\")\n", - " target_codings = np.roll(codings_rnd, -1, axis=0)\n", - " for iteration in range(n_iterations + 1):\n", - " codings_interpolate = codings_rnd + (target_codings - codings_rnd) * iteration / n_iterations\n", - " outputs_val = outputs.eval(feed_dict={codings: codings_interpolate})\n", - " plt.figure(figsize=(11, 1.5*n_iterations))\n", - " for digit_index in range(n_digits):\n", - " plt.subplot(1, n_digits, digit_index + 1)\n", - " plot_image(outputs_val[digit_index])\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# Exercise solutions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Coming soon..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - }, - "nav_menu": { - "height": "381px", - "width": "453px" - }, - "toc": { - "navigate_menu": true, - "number_sections": true, - "sideBar": true, - "threshold": 6, - "toc_cell": false, - "toc_section_display": "block", - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 1 -}