handson-ml/17_autoencoders.ipynb

1301 lines
39 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Chapter 17 Autoencoders**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_This notebook contains all the sample code in chapter 17._"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Python ≥3.5 is required\n",
"import sys\n",
"assert sys.version_info >= (3, 5)\n",
"\n",
"# Scikit-Learn ≥0.20 is required\n",
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\"\n",
"\n",
"# TensorFlow ≥2.0-preview is required\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"assert tf.__version__ >= \"2.0\"\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"mpl.rc('axes', labelsize=14)\n",
"mpl.rc('xtick', labelsize=12)\n",
"mpl.rc('ytick', labelsize=12)\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"autoencoders\"\n",
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
"\n",
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format=fig_extension, dpi=resolution)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A couple utility functions to plot grayscale 28x28 image:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def plot_image(image):\n",
" plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n",
" plt.axis(\"off\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# PCA with a linear Autoencoder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Build 3D dataset:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(4)\n",
"\n",
"m = 200\n",
"w1, w2 = 0.1, 0.3\n",
"noise = 0.1\n",
"\n",
"angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5\n",
"data = np.empty((m, 3))\n",
"data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2\n",
"data[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2\n",
"data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * np.random.randn(m)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Normalize the data:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(data[:100])\n",
"X_test = scaler.transform(data[100:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's build the Autoencoder..."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"\n",
"encoder = keras.models.Sequential([keras.layers.Dense(2, input_shape=[3])])\n",
"decoder = keras.models.Sequential([keras.layers.Dense(3, input_shape=[2])])\n",
"autoencoder = keras.models.Sequential([encoder, decoder])\n",
"\n",
"autoencoder.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.1))\n",
"history = autoencoder.fit(X_train, X_train, epochs=20, validation_data=[X_test, X_test])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"codings_test = encoder.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(4,3))\n",
"plt.plot(codings_test[:,0], codings_test[:, 1], \"b.\")\n",
"plt.xlabel(\"$z_1$\", fontsize=18)\n",
"plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
"save_fig(\"linear_autoencoder_pca_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stacked Autoencoders"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's use MNIST:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
"X_train_full = X_train_full / 255\n",
"X_test = X_test / 255\n",
"X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]\n",
"y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train all layers at once"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (i.e., 2 stacked Autoencoders)."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"stacked_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(30, activation=\"selu\"),\n",
"])\n",
"stacked_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"stacked_ae = keras.models.Sequential([stacked_encoder, stacked_decoder])\n",
"stacked_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n",
" metrics=[\"accuracy\"])\n",
"history = stacked_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This function processes a few test images through the autoencoder and displays the original images and their reconstructions:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def show_reconstructions(model, n_test_images = 2):\n",
" reconstructions = model.predict(X_test[:n_test_images])\n",
" fig = plt.figure(figsize=(8, 3 * n_test_images))\n",
" for image_index in range(n_test_images):\n",
" plt.subplot(n_test_images, 2, image_index * 2 + 1)\n",
" plot_image(X_test[image_index])\n",
" plt.subplot(n_test_images, 2, image_index * 2 + 2)\n",
" plot_image(reconstructions[image_index])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(stacked_ae)\n",
"save_fig(\"reconstruction_plot\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tying weights"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It is common to tie the weights of the encoder and the decoder, by simply using the transpose of the encoder's weights as the decoder weights. For this, we need to use a custom layer."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"K = keras.backend\n",
"\n",
"class DenseTranspose(keras.layers.Layer):\n",
" def __init__(self, dense, activation=None, **kwargs):\n",
" self.dense = dense\n",
" self.activation = keras.activations.get(activation)\n",
" super().__init__(**kwargs)\n",
" def build(self, batch_input_shape):\n",
" self.biases = self.add_weight(name=\"bias\", shape=[self.dense.input_shape[-1]],\n",
" initializer=\"zeros\")\n",
" super().build(batch_input_shape)\n",
" def call(self, inputs):\n",
" return self.activation(inputs @ K.transpose(self.dense.weights[0]) + self.biases)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"tied_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(30, activation=\"selu\"),\n",
"])\n",
"tied_decoder = keras.models.Sequential([\n",
" DenseTranspose(tied_encoder.layers[2], activation=\"selu\"),\n",
" DenseTranspose(tied_encoder.layers[1], activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"tied_ae = keras.models.Sequential([tied_encoder, tied_decoder])\n",
"tied_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n",
" metrics=[\"accuracy\"])\n",
"history = tied_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"show_reconstructions(tied_ae)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training one Autoencoder at a time"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer, metrics=None,\n",
" n_epochs=10, output_activation=None):\n",
" n_inputs = X_train.shape[-1]\n",
" encoder = keras.models.Sequential([\n",
" keras.layers.Dense(n_neurons, activation=\"selu\", input_shape=[n_inputs])\n",
" ])\n",
" decoder = keras.models.Sequential([\n",
" keras.layers.Dense(n_inputs, activation=output_activation),\n",
" ])\n",
" autoencoder = keras.models.Sequential([encoder, decoder])\n",
" autoencoder.compile(optimizer, loss, metrics=metrics)\n",
" autoencoder.fit(X_train, X_train, epochs=n_epochs,\n",
" validation_data=[X_valid, X_valid])\n",
" return encoder, decoder, encoder(X_train), encoder(X_valid)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"X_train_flat = keras.layers.Flatten()(X_train)\n",
"X_valid_flat = keras.layers.Flatten()(X_valid)\n",
"enc1, dec1, X_train_enc1, X_valid_enc1 = train_autoencoder(\n",
" 100, X_train_flat, X_valid_flat, \"binary_crossentropy\", keras.optimizers.SGD(lr=0.1),\n",
" output_activation=\"sigmoid\", metrics=[\"accuracy\"])\n",
"enc2, dec2, _, _ = train_autoencoder(\n",
" 30, X_train_enc1, X_valid_enc1, \"mse\", keras.optimizers.Adam(),\n",
" output_activation=\"selu\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"stacked_ae_1_by_1 = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" enc1,\n",
" enc2,\n",
" dec2,\n",
" dec1,\n",
" keras.layers.Reshape([28, 28])\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(stacked_ae_1_by_1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"stacked_ae_1_by_1.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.5),\n",
" metrics=[\"accuracy\"])\n",
"history = stacked_ae_1_by_1.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(stacked_ae_1_by_1)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visualizing the extracted features"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"weights1 = stacked_ae_1_by_1.layers[1].get_weights()[0]\n",
"plt.figure(figsize=(8, 2))\n",
"n_rows, n_cols = 2, 8\n",
"for row in range(n_rows):\n",
" for col in range(n_cols):\n",
" index = row * n_cols + col\n",
" plt.subplot(n_rows, n_cols, index + 1)\n",
" plt.imshow(weights1[:, index].reshape(28, 28), cmap=\"Greys\")\n",
" plt.axis(\"off\")\n",
"\n",
"save_fig(\"extracted_features_plot\", tight_layout=False) # not shown\n",
"plt.show() # not shown"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using Convolutional Layers Instead of Dense Layers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (i.e., 2 stacked Autoencoders)."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"conv_encoder = keras.models.Sequential([\n",
" keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n",
" keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.MaxPool2D(pool_size=2),\n",
" keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.MaxPool2D(pool_size=2),\n",
" keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.MaxPool2D(pool_size=2)\n",
"])\n",
"conv_decoder = keras.models.Sequential([\n",
" keras.layers.Conv2DTranspose(32, kernel_size=3, strides=2, padding=\"VALID\", activation=\"selu\",\n",
" input_shape=[3, 3, 64]),\n",
" keras.layers.Conv2DTranspose(16, kernel_size=3, strides=2, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.Conv2DTranspose(1, kernel_size=3, strides=2, padding=\"SAME\", activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"conv_ae = keras.models.Sequential([conv_encoder, conv_decoder])\n",
"\n",
"conv_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
" metrics=[\"accuracy\"])\n",
"history = conv_ae.fit(X_train, X_train, epochs=5,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"conv_encoder.summary()\n",
"conv_decoder.summary()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(conv_ae)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Unsupervised pretraining"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's create a small neural network for MNIST classification:"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"X_train_small = X_train[:500]\n",
"y_train_small = y_train[:500]\n",
"\n",
"classifier = keras.models.Sequential([\n",
" keras.layers.Reshape([28, 28, 1], input_shape=[28, 28]),\n",
" keras.layers.Conv2D(16, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.MaxPool2D(pool_size=2),\n",
" keras.layers.Conv2D(32, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.MaxPool2D(pool_size=2),\n",
" keras.layers.Conv2D(64, kernel_size=3, padding=\"SAME\", activation=\"selu\"),\n",
" keras.layers.MaxPool2D(pool_size=2),\n",
" keras.layers.Flatten(),\n",
" keras.layers.Dense(20, activation=\"selu\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n",
"])\n",
"classifier.compile(loss=\"sparse_categorical_crossentropy\", optimizer=keras.optimizers.SGD(lr=0.02), metrics=[\"accuracy\"])\n",
"history = classifier.fit(X_train_small, y_train_small, epochs=20, validation_data=[X_valid, y_valid])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"pd.DataFrame(history.history).plot()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"conv_encoder_clone = keras.models.clone_model(conv_encoder)\n",
"\n",
"pretrained_clf = keras.models.Sequential([\n",
" conv_encoder_clone,\n",
" keras.layers.Flatten(),\n",
" keras.layers.Dense(20, activation=\"selu\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"conv_encoder_clone.trainable = False\n",
"pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n",
" optimizer=keras.optimizers.SGD(lr=0.02),\n",
" metrics=[\"accuracy\"])\n",
"history = pretrained_clf.fit(X_train_small, y_train_small, epochs=30,\n",
" validation_data=[X_valid, y_valid])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"conv_encoder_clone.trainable = True\n",
"pretrained_clf.compile(loss=\"sparse_categorical_crossentropy\",\n",
" optimizer=keras.optimizers.SGD(lr=0.02),\n",
" metrics=[\"accuracy\"])\n",
"history = pretrained_clf.fit(X_train_small, y_train_small, epochs=20,\n",
" validation_data=[X_valid, y_valid])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stacked denoising Autoencoder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using Gaussian noise:"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"denoising_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.GaussianNoise(1.0),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(30, activation=\"selu\")\n",
"])\n",
"denoising_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"denoising_ae = keras.models.Sequential([denoising_encoder, denoising_decoder])\n",
"denoising_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
" metrics=[\"accuracy\"])\n",
"history = denoising_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(denoising_ae)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using dropout:"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"dropout_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dropout(0.5),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(30, activation=\"selu\")\n",
"])\n",
"dropout_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"dropout_ae = keras.models.Sequential([dropout_encoder, dropout_decoder])\n",
"dropout_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
" metrics=[\"accuracy\"])\n",
"history = dropout_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(dropout_ae)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sparse Autoencoder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's build a simple stacked autoencoder, but this time we will use the sigmoid activation function for the coding layer, to ensure that the coding values range from 0 to 1:"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"simple_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(30, activation=\"sigmoid\"),\n",
"])\n",
"simple_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[30]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"simple_ae = keras.models.Sequential([simple_encoder, simple_decoder])\n",
"simple_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.),\n",
" metrics=[\"accuracy\"])\n",
"history = simple_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(simple_ae)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's create a couple functions to print nice activation histograms:"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def plot_percent_hist(ax, data, bins):\n",
" counts, _ = np.histogram(data, bins=bins)\n",
" widths = bins[1:] - bins[:-1]\n",
" x = bins[:-1] + widths / 2\n",
" ax.bar(x, counts / len(data), width=widths*0.8)\n",
" ax.xaxis.set_ticks(bins)\n",
" ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(\n",
" lambda y, position: \"{}%\".format(int(np.round(100 * y)))))\n",
" ax.grid(True)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"def plot_activations_histogram(encoder, height=1, n_bins=10):\n",
" X_valid_codings = encoder(X_valid).numpy()\n",
" activation_means = X_valid_codings.mean(axis=0)\n",
" mean = activation_means.mean()\n",
" bins = np.linspace(0, 1, n_bins + 1)\n",
"\n",
" fig, [ax1, ax2] = plt.subplots(figsize=(10, 3), nrows=1, ncols=2, sharey=True)\n",
" plot_percent_hist(ax1, X_valid_codings.ravel(), bins)\n",
" ax1.plot([mean, mean], [0, height], \"k--\", label=\"Overall Mean = {:.2f}\".format(mean))\n",
" ax1.legend(loc=\"upper center\", fontsize=14)\n",
" ax1.set_xlabel(\"Activation\")\n",
" ax1.set_ylabel(\"% Activations\")\n",
" ax1.axis([0, 1, 0, height])\n",
" plot_percent_hist(ax2, activation_means, bins)\n",
" ax2.plot([mean, mean], [0, height], \"k--\")\n",
" ax2.set_xlabel(\"Neuron Mean Activation\")\n",
" ax2.set_ylabel(\"% Neurons\")\n",
" ax2.axis([0, 1, 0, height])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's use these functions to plot histograms of the activations of the encoding layer. The histogram on the left shows the distribution of all the activations. You can see that values close to 0 or 1 are more frequent overall, which is consistent with the saturating nature of the sigmoid function. The histogram on the right shows the distribution of mean neuron activations: you can see that most neurons have a mean activation close to 0.5. Both histograms tell us that each neuron tends to either fire close to 0 or 1, with about 50% probability each. However, some neurons fire almost all the time (right side of the right histogram)."
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"plot_activations_histogram(simple_encoder, height=0.35)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's add $\\ell_1$ regularization to the coding layer:"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"sparse_l1_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(300, activation=\"sigmoid\"),\n",
" keras.layers.ActivityRegularization(l1=1e-3) # Alternatively, you could add\n",
" # activity_regularizer=keras.regularizers.l1(1e-3)\n",
" # to the previous layer.\n",
"])\n",
"sparse_l1_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[300]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"sparse_l1_ae = keras.models.Sequential([sparse_l1_encoder, sparse_l1_decoder])\n",
"sparse_l1_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
" metrics=[\"accuracy\"])\n",
"history = sparse_l1_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(sparse_l1_ae)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"plot_activations_histogram(sparse_l1_encoder, height=1.)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's use the KL Divergence loss instead to ensure sparsity, and target 10% sparsity rather than 0%:"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"p = 0.1\n",
"q = np.linspace(0.001, 0.999, 500)\n",
"kl_div = p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q))\n",
"mse = (p - q)**2\n",
"mae = np.abs(p - q)\n",
"plt.plot([p, p], [0, 0.3], \"k:\")\n",
"plt.text(0.05, 0.32, \"Target\\nsparsity\", fontsize=14)\n",
"plt.plot(q, kl_div, \"b-\", label=\"KL divergence\")\n",
"plt.plot(q, mae, \"g--\", label=r\"MAE ($\\ell_1$)\")\n",
"plt.plot(q, mse, \"r--\", linewidth=1, label=r\"MSE ($\\ell_2$)\")\n",
"plt.legend(loc=\"upper left\", fontsize=14)\n",
"plt.xlabel(\"Actual sparsity\")\n",
"plt.ylabel(\"Cost\", rotation=0)\n",
"plt.axis([0, 1, 0, 0.95])\n",
"save_fig(\"sparsity_loss_plot\")"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"K = keras.backend\n",
"\n",
"class KLDivergenceRegularizer(keras.regularizers.Regularizer):\n",
" def __init__(self, weight, target=0.1):\n",
" self.weight = weight\n",
" self.target = target\n",
" def __call__(self, inputs):\n",
" mean_activities = K.mean(inputs, axis=0)\n",
" return self.weight * (\n",
" keras.losses.kullback_leibler_divergence(self.target, mean_activities) +\n",
" keras.losses.kullback_leibler_divergence(1. - self.target, 1. - mean_activities))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"kld_reg = KLDivergenceRegularizer(weight=0.05, target=0.1)\n",
"sparse_kl_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.Dense(300, activation=\"sigmoid\", activity_regularizer=kld_reg)\n",
"])\n",
"sparse_kl_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[300]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"sparse_kl_ae = keras.models.Sequential([sparse_kl_encoder, sparse_kl_decoder])\n",
"sparse_kl_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
" metrics=[\"accuracy\"])\n",
"history = sparse_kl_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(sparse_kl_ae)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"plot_activations_histogram(sparse_kl_encoder)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hashing Autoencoder"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"hashing_encoder = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(100, activation=\"selu\"),\n",
" keras.layers.GaussianNoise(15.),\n",
" keras.layers.Dense(16, activation=\"sigmoid\"),\n",
"])\n",
"hashing_decoder = keras.models.Sequential([\n",
" keras.layers.Dense(100, activation=\"selu\", input_shape=[16]),\n",
" keras.layers.Dense(28 * 28, activation=\"sigmoid\"),\n",
" keras.layers.Reshape([28, 28])\n",
"])\n",
"hashing_ae = keras.models.Sequential([hashing_encoder, hashing_decoder])\n",
"hashing_ae.compile(loss=\"binary_crossentropy\", optimizer=keras.optimizers.SGD(lr=1.0),\n",
" metrics=[\"accuracy\"])\n",
"history = hashing_ae.fit(X_train, X_train, epochs=10,\n",
" validation_data=[X_valid, X_valid])"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(hashing_ae)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"plot_activations_histogram(hashing_encoder)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"hashes = np.round(hashing_encoder.predict(X_valid)).astype(np.int32)\n",
"hashes *= np.array([[2**bit for bit in range(16)]])\n",
"hashes = hashes.sum(axis=1)\n",
"for h in hashes[:5]:\n",
" print(\"{:016b}\".format(h))\n",
"print(\"...\")"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"n_bits = 4\n",
"n_images = 8\n",
"plt.figure(figsize=(n_images, n_bits))\n",
"for bit_index in range(n_bits):\n",
" in_bucket = (hashes & 2**bit_index != 0)\n",
" for index, image in zip(range(n_images), X_valid[in_bucket]):\n",
" plt.subplot(n_bits, n_images, bit_index * n_images + index + 1)\n",
" plt.imshow(image, cmap=\"binary\")\n",
" plt.axis(\"off\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Variational Autoencoder"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"class Sampling(keras.layers.Layer):\n",
" def call(self, inputs):\n",
" mean, log_var = inputs\n",
" return mean + K.exp(log_var / 2) * K.random_normal(shape=tf.shape(log_var))"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"codings_size = 30\n",
"\n",
"inputs = keras.layers.Input(shape=[28, 28])\n",
"z = keras.layers.Flatten()(inputs)\n",
"z = keras.layers.Dense(150, activation=\"selu\")(z)\n",
"z = keras.layers.Dense(100, activation=\"selu\")(z)\n",
"codings_mean = keras.layers.Dense(codings_size)(z)\n",
"codings_log_var = keras.layers.Dense(codings_size)(z)\n",
"codings = Sampling()([codings_mean, codings_log_var])\n",
"variational_encoder = keras.models.Model(inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])\n",
"\n",
"decoder_inputs = keras.layers.Input(shape=[codings_size])\n",
"x = keras.layers.Dense(100, activation=\"selu\")(decoder_inputs)\n",
"x = keras.layers.Dense(150, activation=\"selu\")(x)\n",
"x = keras.layers.Dense(28 * 28, activation=\"sigmoid\")(x)\n",
"outputs = keras.layers.Reshape([28, 28])(x)\n",
"variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])\n",
"\n",
"_, _, codings = variational_encoder(inputs)\n",
"reconstructions = variational_decoder(codings)\n",
"variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])\n",
"\n",
"kld_loss = -0.5 * K.sum(1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean), axis=-1)\n",
"variational_ae.add_loss(K.mean(kld_loss) / 784.)\n",
"variational_ae.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\", metrics=[\"accuracy\"])\n",
"history = variational_ae.fit(X_train, X_train, epochs=50,\n",
" validation_data=[X_valid, X_valid],\n",
" batch_size=128)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"show_reconstructions(variational_ae)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate Fashion Images"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's train the model and generate a few random fashion images:"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42)\n",
"np.random.seed(42)\n",
"\n",
"n_rows = 6\n",
"n_cols = 10\n",
"codings_rnd = np.random.normal(size=[n_rows * n_cols, codings_size])\n",
"images = variational_decoder.predict(codings_rnd)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"def plot_multiple_images(images, n_rows, n_cols, pad=2):\n",
" images = images - images.min() # make the minimum == 0, so the padding looks white\n",
" w,h = images.shape[1:]\n",
" image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))\n",
" for y in range(n_rows):\n",
" for x in range(n_cols):\n",
" image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]\n",
" plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n",
" plt.axis(\"off\")"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 5))\n",
"plot_multiple_images(images, n_rows, n_cols)\n",
"save_fig(\"generated_fashion_images_plot\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Encode & Decode"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"n_iterations = 3\n",
"n_images = 10\n",
"source_codings = np.random.normal(size=[n_images, codings_size])\n",
"target_codings = np.roll(source_codings, -1, axis=0)\n",
"images = []\n",
"for iteration in range(n_iterations):\n",
" codings_interpolate = source_codings + (target_codings - source_codings) * iteration / n_iterations\n",
" images.append(variational_decoder(codings_interpolate).numpy())\n",
"images = np.concatenate(images)\n",
"\n",
"plt.figure(figsize=(8, 3))\n",
"plot_multiple_images(images, n_iterations, n_cols)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"nav_menu": {
"height": "381px",
"width": "453px"
},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
}