diff --git a/15_autoencoders.ipynb b/15_autoencoders.ipynb
new file mode 100644
index 0000000..c987351
--- /dev/null
+++ b/15_autoencoders.ipynb
@@ -0,0 +1,1588 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Chapter 15 – Autoencoders**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_This notebook contains all the sample code and solutions to the exercices in chapter 15._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To support both python 2 and python 3\n",
+    "from __future__ import division, print_function, unicode_literals\n",
+    "\n",
+    "# Common imports\n",
+    "import numpy as np\n",
+    "import numpy.random as rnd\n",
+    "import os\n",
+    "\n",
+    "# to make this notebook's output stable across runs\n",
+    "rnd.seed(42)\n",
+    "\n",
+    "# To plot pretty figures\n",
+    "%matplotlib inline\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams['axes.labelsize'] = 14\n",
+    "plt.rcParams['xtick.labelsize'] = 12\n",
+    "plt.rcParams['ytick.labelsize'] = 12\n",
+    "\n",
+    "# Where to save the figures\n",
+    "PROJECT_ROOT_DIR = \".\"\n",
+    "CHAPTER_ID = \"autoencoders\"\n",
+    "\n",
+    "def save_fig(fig_id, tight_layout=True):\n",
+    "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
+    "    print(\"Saving figure\", fig_id)\n",
+    "    if tight_layout:\n",
+    "        plt.tight_layout()\n",
+    "    plt.savefig(path, format='png', dpi=300)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A couple utility functions to plot grayscale 28x28 image:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def plot_image(image, shape=[28, 28]):\n",
+    "    plt.imshow(image.reshape(shape), cmap=\"Greys\", interpolation=\"nearest\")\n",
+    "    plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def plot_multiple_images(images, n_rows, n_cols, pad=2):\n",
+    "    images = images - images.min()  # make the minimum == 0, so the padding looks white\n",
+    "    w,h = images.shape[1:]\n",
+    "    image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))\n",
+    "    for y in range(n_rows):\n",
+    "        for x in range(n_cols):\n",
+    "            image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]\n",
+    "    plt.imshow(image, cmap=\"Greys\", interpolation=\"nearest\")\n",
+    "    plt.axis(\"off\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PCA with a linear Autoencoder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Build 3D dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "rnd.seed(4)\n",
+    "m = 100\n",
+    "w1, w2 = 0.1, 0.3\n",
+    "noise = 0.1\n",
+    "\n",
+    "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n",
+    "X_train = np.empty((m, 3))\n",
+    "X_train[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n",
+    "X_train[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n",
+    "X_train[:, 2] = X_train[:, 0] * w1 + X_train[:, 1] * w2 + noise * rnd.randn(m)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Normalize the data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "X_train = scaler.fit_transform(X_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Going to need TensorFlow..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's build the Autoencoder:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_inputs = 3\n",
+    "n_hidden = 2  # codings\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "hidden = fully_connected(X, n_hidden, activation_fn=None)\n",
+    "outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
+    "\n",
+    "mse = tf.reduce_sum(tf.square(outputs - X))\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "training_op = optimizer.minimize(mse)\n",
+    "\n",
+    "init = tf.initialize_all_variables()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 10000\n",
+    "codings = hidden\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for iteration in range(n_iterations):\n",
+    "        training_op.run(feed_dict={X: X_train})\n",
+    "    codings_val = codings.eval(feed_dict={X: X_train})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(4,3))\n",
+    "plt.plot(codings_val[:,0], codings_val[:, 1], \"b.\")\n",
+    "plt.xlabel(\"$z_1$\", fontsize=18)\n",
+    "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
+    "save_fig(\"linear_autoencoder_pca_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Stacked Autoencoders"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's use MNIST:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from tensorflow.examples.tutorials.mnist import input_data\n",
+    "mnist = input_data.read_data_sets(\"/tmp/data/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train all layers at once"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import fully_connected\n",
+    "\n",
+    "n_inputs = 28*28\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 150  # codings\n",
+    "n_hidden3 = n_hidden1\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "l2_reg = 0.0001\n",
+    "\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer() # He initialization\n",
+    "#Equivalent to:\n",
+    "#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "with tf.contrib.framework.arg_scope(\n",
+    "        [fully_connected],\n",
+    "        activation_fn=tf.nn.elu,\n",
+    "        weights_initializer=initializer,\n",
+    "        weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
+    "    hidden1 = fully_connected(X, n_hidden1)\n",
+    "    hidden2 = fully_connected(hidden1, n_hidden2)\n",
+    "    hidden3 = fully_connected(hidden2, n_hidden3)\n",
+    "    outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
+    "\n",
+    "mse = tf.reduce_mean(tf.square(outputs - X))\n",
+    "\n",
+    "reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n",
+    "loss = mse + reg_losses\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's train it! Note that we don't feed target values (`y_batch` is not used). This is unsupervised training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 4\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = mnist.train.num_examples // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch})\n",
+    "        mse_train = mse.eval(feed_dict={X: X_batch})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
+    "        saver.save(sess, \"my_model_all_layers.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This function loads the model, evaluates it on the test set (it measures the reconstruction error), then it displays the original image and its reconstruction:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "def show_reconstructed_digits(X, outputs, model_path = None, n_test_digits = 2):\n",
+    "    with tf.Session() as sess:\n",
+    "        if model_path:\n",
+    "            saver.restore(sess, model_path)\n",
+    "        X_test = mnist.test.images[:n_test_digits]\n",
+    "        outputs_val = outputs.eval(feed_dict={X: X_test})\n",
+    "\n",
+    "    fig = plt.figure(figsize=(8, 3 * n_test_digits))\n",
+    "    for digit_index in range(n_test_digits):\n",
+    "        plt.subplot(n_test_digits, 2, digit_index * 2 + 1)\n",
+    "        plot_image(X_test[digit_index])\n",
+    "        plt.subplot(n_test_digits, 2, digit_index * 2 + 2)\n",
+    "        plot_image(outputs_val[digit_index])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs, \"my_model_all_layers.ckpt\")\n",
+    "save_fig(\"reconstruction_plot\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training one Autoencoder at a time in multiple graphs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are many ways to train one Autoencoder at a time. The first approach it to train each Autoencoder using a different graph, then we create the Stacked Autoencoder by simply initializing it with the weights and biases copied from these Autoencoders."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create a function that will train one autoencoder and return the transformed training set (ie. the output of the hidden layer) and the model parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n",
+    "    graph = tf.Graph()\n",
+    "    with graph.as_default():\n",
+    "        n_inputs = X_train.shape[1]\n",
+    "\n",
+    "        X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "        with tf.contrib.framework.arg_scope(\n",
+    "                [fully_connected],\n",
+    "                activation_fn=activation_fn,\n",
+    "                weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
+    "                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
+    "            hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n",
+    "            outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n",
+    "\n",
+    "        mse = tf.reduce_mean(tf.square(outputs - X))\n",
+    "\n",
+    "        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n",
+    "        loss = mse + reg_losses\n",
+    "\n",
+    "        optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "        training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "        init = tf.initialize_all_variables()\n",
+    "\n",
+    "    with tf.Session(graph=graph) as sess:\n",
+    "        init.run()\n",
+    "        for epoch in range(n_epochs):\n",
+    "            n_batches = len(X_train) // batch_size\n",
+    "            for iteration in range(n_batches):\n",
+    "                print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "                sys.stdout.flush()\n",
+    "                indices = rnd.permutation(len(X_train))[:batch_size]\n",
+    "                X_batch = X_train[indices]\n",
+    "                sess.run(training_op, feed_dict={X: X_batch})\n",
+    "            mse_train = mse.eval(feed_dict={X: X_batch})\n",
+    "            print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
+    "        params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
+    "        hidden_val = hidden.eval(feed_dict={X: X_train})\n",
+    "        return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's train two Autoencoders. The first one is trained on the training data, and the second is trained on the previous Autoencoder's hidden layer output:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "hidden_output, W1, b1, W4, b4 = train_autoencoder(mnist.train.images, n_neurons=300, n_epochs=4, batch_size=150)\n",
+    "_, W2, b2, W3, b3 = train_autoencoder(hidden_output, n_neurons=150, n_epochs=4, batch_size=150)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we can create a Stacked Autoencoder by simply reusing the weights and biases from the Autoencoders we just trained:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28*28\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "hidden1 = tf.nn.elu(tf.matmul(X, W1) + b1)\n",
+    "hidden2 = tf.nn.elu(tf.matmul(hidden1, W2) + b2)\n",
+    "hidden3 = tf.nn.elu(tf.matmul(hidden2, W3) + b3)\n",
+    "outputs = tf.matmul(hidden3, W4) + b4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training one Autoencoder at a time in a single graph"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Another approach is to use a single graph. To do this, we create the graph for the full Stacked Autoencoder, but then we also add operations to train each Autoencoder independently: phase 1 trains the bottom and top layer (ie. the first Autoencoder) and phase 2 trains the two middle layers (ie. the second Autoencoder)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28 * 28\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 150  # codings\n",
+    "n_hidden3 = n_hidden1\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "l2_reg = 0.0001\n",
+    "\n",
+    "activation = tf.nn.elu\n",
+    "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "\n",
+    "weights1_init = initializer([n_inputs, n_hidden1])\n",
+    "weights2_init = initializer([n_hidden1, n_hidden2])\n",
+    "weights3_init = initializer([n_hidden2, n_hidden3])\n",
+    "weights4_init = initializer([n_hidden3, n_outputs])\n",
+    "\n",
+    "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n",
+    "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n",
+    "weights3 = tf.Variable(weights3_init, dtype=tf.float32, name=\"weights3\")\n",
+    "weights4 = tf.Variable(weights4_init, dtype=tf.float32, name=\"weights4\")\n",
+    "\n",
+    "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n",
+    "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n",
+    "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n",
+    "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n",
+    "\n",
+    "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n",
+    "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n",
+    "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n",
+    "outputs = tf.matmul(hidden3, weights4) + biases4\n",
+    "\n",
+    "\n",
+    "with tf.name_scope(\"phase1\"):\n",
+    "    optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "    phase1_outputs = tf.matmul(hidden1, weights4) + biases4  # bypass hidden2 and hidden3\n",
+    "    phase1_mse = tf.reduce_mean(tf.square(phase1_outputs - X))\n",
+    "    phase1_reg_loss = regularizer(weights1) + regularizer(weights4)\n",
+    "    phase1_loss = phase1_mse + phase1_reg_loss\n",
+    "    phase1_training_op = optimizer.minimize(phase1_loss)\n",
+    "\n",
+    "with tf.name_scope(\"phase2\"):\n",
+    "    optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "    phase2_mse = tf.reduce_mean(tf.square(hidden3 - hidden1))\n",
+    "    phase2_reg_loss = regularizer(weights2) + regularizer(weights3)\n",
+    "    phase2_loss = phase2_mse + phase2_reg_loss\n",
+    "    phase2_training_op = optimizer.minimize(phase2_loss, var_list=[weights2, biases2, weights3, biases3]) # freeze hidden1\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "training_ops = [phase1_training_op, phase2_training_op]\n",
+    "mses = [phase1_mse, phase2_mse]\n",
+    "n_epochs = [4, 4]\n",
+    "batch_sizes = [150, 150]\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for phase in range(2):\n",
+    "        print(\"Training phase #{}\".format(phase + 1))\n",
+    "        for epoch in range(n_epochs[phase]):\n",
+    "            n_batches = mnist.train.num_examples // batch_sizes[phase]\n",
+    "            for iteration in range(n_batches):\n",
+    "                print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "                sys.stdout.flush()\n",
+    "                X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n",
+    "                sess.run(training_ops[phase], feed_dict={X: X_batch})\n",
+    "            mse_train = mses[phase].eval(feed_dict={X: X_batch})\n",
+    "            print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
+    "            saver.save(sess, \"my_model_one_at_a_time.ckpt\")\n",
+    "    mse_test = mses[phase].eval(feed_dict={X: mnist.test.images})\n",
+    "    print(\"Test MSE:\", mse_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs, \"my_model_one_at_a_time.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cache the frozen layer outputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "training_ops = [phase1_training_op, phase2_training_op, training_op]\n",
+    "mses = [phase1_mse, phase2_mse, mse]\n",
+    "n_epochs = [4, 4]\n",
+    "batch_sizes = [150, 150]\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for phase in range(2):\n",
+    "        print(\"Training phase #{}\".format(phase + 1))\n",
+    "        if phase == 1:\n",
+    "            mnist_hidden1 = hidden1.eval(feed_dict={X: mnist.train.images})\n",
+    "        for epoch in range(n_epochs[phase]):\n",
+    "            n_batches = mnist.train.num_examples // batch_sizes[phase]\n",
+    "            for iteration in range(n_batches):\n",
+    "                print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "                sys.stdout.flush()\n",
+    "                if phase == 1:\n",
+    "                    indices = rnd.permutation(len(mnist_hidden1))\n",
+    "                    hidden1_batch = mnist_hidden1[indices[:batch_sizes[phase]]]\n",
+    "                    feed_dict = {hidden1: hidden1_batch}\n",
+    "                    sess.run(training_ops[phase], feed_dict=feed_dict)\n",
+    "                else:\n",
+    "                    X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n",
+    "                    feed_dict = {X: X_batch}\n",
+    "                    sess.run(training_ops[phase], feed_dict=feed_dict)\n",
+    "            mse_train = mses[phase].eval(feed_dict=feed_dict)\n",
+    "            print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
+    "            saver.save(sess, \"my_model_cache_frozen.ckpt\")\n",
+    "    mse_test = mses[phase].eval(feed_dict={X: mnist.test.images})\n",
+    "    print(\"Test MSE:\", mse_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs, \"my_model_cache_frozen.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tying weights"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28 * 28\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 150  # codings\n",
+    "n_hidden3 = n_hidden1\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "l2_reg = 0.0005\n",
+    "\n",
+    "activation = tf.nn.elu\n",
+    "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "\n",
+    "weights1_init = initializer([n_inputs, n_hidden1])\n",
+    "weights2_init = initializer([n_hidden1, n_hidden2])\n",
+    "\n",
+    "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n",
+    "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n",
+    "weights3 = tf.transpose(weights2, name=\"weights3\")  # tied weights\n",
+    "weights4 = tf.transpose(weights1, name=\"weights4\")  # tied weights\n",
+    "\n",
+    "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n",
+    "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n",
+    "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n",
+    "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n",
+    "\n",
+    "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n",
+    "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n",
+    "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n",
+    "outputs = tf.matmul(hidden3, weights4) + biases4\n",
+    "\n",
+    "mse = tf.reduce_mean(tf.square(outputs - X))\n",
+    "reg_loss = regularizer(weights1) + regularizer(weights2)\n",
+    "loss = mse + reg_loss\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 5\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = mnist.train.num_examples // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch})\n",
+    "        mse_train = mse.eval(feed_dict={X: X_batch})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
+    "        saver.save(sess, \"my_model_tying_weights.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs, \"my_model_tying_weights.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Unsupervised pretraining"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28 * 28\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 150\n",
+    "n_outputs = 10\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "l2_reg = 0.0005\n",
+    "\n",
+    "activation = tf.nn.elu\n",
+    "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "y = tf.placeholder(tf.int32, shape=[None])\n",
+    "\n",
+    "weights1_init = initializer([n_inputs, n_hidden1])\n",
+    "weights2_init = initializer([n_hidden1, n_hidden2])\n",
+    "weights3_init = initializer([n_hidden2, n_hidden3])\n",
+    "\n",
+    "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n",
+    "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n",
+    "weights3 = tf.Variable(weights3_init, dtype=tf.float32, name=\"weights3\")\n",
+    "\n",
+    "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n",
+    "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n",
+    "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n",
+    "\n",
+    "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n",
+    "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n",
+    "logits = tf.matmul(hidden2, weights3) + biases3\n",
+    "\n",
+    "cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
+    "reg_loss = regularizer(weights1) + regularizer(weights2) + regularizer(weights3)\n",
+    "loss = cross_entropy + reg_loss\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "correct = tf.nn.in_top_k(logits, y, 1)\n",
+    "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "pretrain_saver = tf.train.Saver([weights1, weights2, biases1, biases2])\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Regular training (without pretraining):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 4\n",
+    "batch_size = 150\n",
+    "n_labeled_instances = 20000\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = n_labeled_instances // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            indices = rnd.permutation(n_labeled_instances)[:batch_size]\n",
+    "            X_batch, y_batch = mnist.train.images[indices], mnist.train.labels[indices]\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train accuracy:\", accuracy_val, end=\" \")\n",
+    "        saver.save(sess, \"my_model_supervised.ckpt\")\n",
+    "        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(\"Test accuracy:\", accuracy_val)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now reusing the first two layers of the autoencoder we pretrained:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 4\n",
+    "batch_size = 150\n",
+    "n_labeled_instances = 20000\n",
+    "\n",
+    "#training_op = optimizer.minimize(loss, var_list=[weights3, biases3])  # Freeze layers 1 and 2 (optional)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    pretrain_saver.restore(sess, \"my_model_cache_frozen.ckpt\")\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = n_labeled_instances // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            indices = rnd.permutation(n_labeled_instances)[:batch_size]\n",
+    "            X_batch, y_batch = mnist.train.images[indices], mnist.train.labels[indices]\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train accuracy:\", accuracy_val, end=\"\\t\")\n",
+    "        saver.save(sess, \"my_model_supervised_pretrained.ckpt\")\n",
+    "        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
+    "        print(\"Test accuracy:\", accuracy_val)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Stacked denoising Autoencoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "from tensorflow.contrib.layers import dropout\n",
+    "\n",
+    "n_inputs = 28 * 28\n",
+    "n_hidden1 = 300\n",
+    "n_hidden2 = 150  # codings\n",
+    "n_hidden3 = n_hidden1\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "l2_reg = 0.00001\n",
+    "keep_prob = 0.7\n",
+    "\n",
+    "activation = tf.nn.elu\n",
+    "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
+    "\n",
+    "X_drop = dropout(X, keep_prob, is_training=is_training)\n",
+    "\n",
+    "weights1_init = initializer([n_inputs, n_hidden1])\n",
+    "weights2_init = initializer([n_hidden1, n_hidden2])\n",
+    "\n",
+    "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n",
+    "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n",
+    "weights3 = tf.transpose(weights2, name=\"weights3\")  # tied weights\n",
+    "weights4 = tf.transpose(weights1, name=\"weights4\")  # tied weights\n",
+    "\n",
+    "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n",
+    "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n",
+    "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n",
+    "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n",
+    "\n",
+    "hidden1 = activation(tf.matmul(X_drop, weights1) + biases1)\n",
+    "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n",
+    "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n",
+    "outputs = tf.matmul(hidden3, weights4) + biases4\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "mse = tf.reduce_mean(tf.square(outputs - X))\n",
+    "reg_loss = regularizer(weights1) + regularizer(weights2)\n",
+    "loss = mse + reg_loss\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "    \n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = mnist.train.num_examples // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch, is_training: True})\n",
+    "        mse_train = mse.eval(feed_dict={X: X_batch, is_training: False})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
+    "        saver.save(sess, \"my_model_stacked_denoising.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs, \"my_model_stacked_denoising.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visualizing the extracted features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_stacked_denoising.ckpt\")\n",
+    "    weights1_val = weights1.eval()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "for i in range(5):\n",
+    "    plt.subplot(1, 5, i + 1)\n",
+    "    plot_image(weights1_val.T[i])\n",
+    "\n",
+    "save_fig(\"extracted_features_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Sparse Autoencoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p = 0.1\n",
+    "q = np.linspace(0, 1, 500)\n",
+    "kl_div = p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q))\n",
+    "mse = (p - q)**2\n",
+    "plt.plot([p, p], [0, 0.3], \"k:\")\n",
+    "plt.text(0.05, 0.32, \"Target\\nsparsity\", fontsize=14)\n",
+    "plt.plot(q, kl_div, \"b-\", label=\"KL divergence\")\n",
+    "plt.plot(q, mse, \"r--\", label=\"MSE\")\n",
+    "plt.legend(loc=\"upper left\")\n",
+    "plt.xlabel(\"Actual sparsity\")\n",
+    "plt.ylabel(\"Cost\", rotation=0)\n",
+    "plt.axis([0, 1, 0, 0.95])\n",
+    "save_fig(\"sparsity_loss_plot\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def kl_divergence(p, q):\n",
+    "    \"\"\"Kullback Leibler divergence\"\"\"\n",
+    "    return p * tf.log(p / q) + (1 - p) * tf.log((1 - p) / (1 - q))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28 * 28\n",
+    "n_hidden1 = 1000  # sparse codings\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.01\n",
+    "sparsity_target = 0.1\n",
+    "sparsity_weight = 0.2\n",
+    "\n",
+    "#activation = tf.nn.softplus # soft variant of ReLU\n",
+    "activation = tf.nn.sigmoid\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
+    "\n",
+    "weights1_init = initializer([n_inputs, n_hidden1])\n",
+    "weights2_init = initializer([n_hidden1, n_outputs])\n",
+    "\n",
+    "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n",
+    "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n",
+    "\n",
+    "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n",
+    "biases2 = tf.Variable(tf.zeros(n_outputs), name=\"biases2\")\n",
+    "\n",
+    "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n",
+    "outputs = tf.matmul(hidden1, weights2) + biases2\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
+    "mse = tf.reduce_mean(tf.square(outputs - X))\n",
+    "\n",
+    "hidden1_mean = tf.reduce_mean(hidden1, reduction_indices=0) # batch mean\n",
+    "sparsity_loss = tf.reduce_sum(kl_divergence(sparsity_target, hidden1_mean))\n",
+    "loss = mse + sparsity_weight * sparsity_loss\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 100\n",
+    "batch_size = 1000\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = mnist.train.num_examples // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch})\n",
+    "        mse_val, sparsity_loss_val, loss_val = sess.run([mse, sparsity_loss, loss], feed_dict={X: X_batch})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_val, \"\\tSparsity loss:\", sparsity_loss_val, \"\\tTotal loss:\", loss_val)\n",
+    "        saver.save(sess, \"my_model_sparse.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "show_reconstructed_digits(X, outputs, \"my_model_sparse.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Variational Autoencoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28*28\n",
+    "n_hidden1 = 500\n",
+    "n_hidden2 = 500\n",
+    "n_hidden3 = 20  # codings\n",
+    "n_hidden4 = n_hidden2\n",
+    "n_hidden5 = n_hidden1\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "activation = tf.nn.elu\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer(mode=\"FAN_AVG\",\n",
+    "                                                             uniform=True)\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "\n",
+    "weights1 = tf.Variable(initializer([n_inputs, n_hidden1]))\n",
+    "weights2 = tf.Variable(initializer([n_hidden1, n_hidden2]))\n",
+    "weights3_mean = tf.Variable(initializer([n_hidden2, n_hidden3]))\n",
+    "weights3_log_sigma = tf.Variable(initializer([n_hidden2, n_hidden3]))\n",
+    "weights4 = tf.Variable(initializer([n_hidden3, n_hidden4]))\n",
+    "weights5 = tf.Variable(initializer([n_hidden4, n_hidden5]))\n",
+    "weights6 = tf.Variable(initializer([n_hidden5, n_inputs]))\n",
+    "\n",
+    "biases1 = tf.Variable(tf.zeros([n_hidden1], dtype=tf.float32))\n",
+    "biases2 = tf.Variable(tf.zeros([n_hidden2], dtype=tf.float32))\n",
+    "biases3_mean = tf.Variable(tf.zeros([n_hidden3], dtype=tf.float32))\n",
+    "biases3_log_sigma = tf.Variable(tf.zeros([n_hidden3], dtype=tf.float32))\n",
+    "biases4 = tf.Variable(tf.zeros([n_hidden4], dtype=tf.float32))\n",
+    "biases5 = tf.Variable(tf.zeros([n_hidden5], dtype=tf.float32))\n",
+    "biases6 = tf.Variable(tf.zeros([n_inputs], dtype=tf.float32))\n",
+    "\n",
+    "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n",
+    "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n",
+    "\n",
+    "hidden3_mean = tf.matmul(hidden2, weights3_mean) + biases3_mean\n",
+    "hidden3_log_sigma = tf.matmul(hidden2, weights3_log_sigma) + biases3_log_sigma\n",
+    "noise = tf.random_normal(tf.shape(hidden3_log_sigma), dtype=tf.float32)\n",
+    "hidden3 = hidden3_mean + tf.sqrt(tf.exp(hidden3_log_sigma)) * noise\n",
+    "\n",
+    "hidden4 = activation(tf.matmul(hidden3, weights4) + biases4)\n",
+    "hidden5 = activation(tf.matmul(hidden4, weights5) + biases5)\n",
+    "logits = tf.matmul(hidden5, weights6) + biases6\n",
+    "outputs = tf.sigmoid(logits)\n",
+    "\n",
+    "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits, X))\n",
+    "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_log_sigma) + tf.square(hidden3_mean) - 1 - hidden3_log_sigma)\n",
+    "cost = reconstruction_loss + latent_loss\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(cost)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "n_inputs = 28*28\n",
+    "n_hidden1 = 500\n",
+    "n_hidden2 = 500\n",
+    "n_hidden3 = 20  # codings\n",
+    "n_hidden4 = n_hidden2\n",
+    "n_hidden5 = n_hidden1\n",
+    "n_outputs = n_inputs\n",
+    "\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "initializer = tf.contrib.layers.variance_scaling_initializer()\n",
+    "\n",
+    "with tf.contrib.framework.arg_scope([fully_connected],\n",
+    "                                    activation_fn=tf.nn.elu,\n",
+    "                                    weights_initializer=initializer):\n",
+    "    X = tf.placeholder(tf.float32, [None, n_inputs])\n",
+    "    hidden1 = fully_connected(X, n_hidden1)\n",
+    "    hidden2 = fully_connected(hidden1, n_hidden2)\n",
+    "    hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
+    "    hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
+    "    noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
+    "    hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
+    "    hidden4 = fully_connected(hidden3, n_hidden4)\n",
+    "    hidden5 = fully_connected(hidden4, n_hidden5)\n",
+    "    logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n",
+    "    outputs = tf.sigmoid(logits)\n",
+    "\n",
+    "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits, X))\n",
+    "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
+    "cost = reconstruction_loss + latent_loss\n",
+    "\n",
+    "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
+    "training_op = optimizer.minimize(cost)\n",
+    "\n",
+    "init = tf.initialize_all_variables()\n",
+    "saver = tf.train.Saver()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_epochs = 50\n",
+    "batch_size = 150\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    init.run()\n",
+    "    for epoch in range(n_epochs):\n",
+    "        n_batches = mnist.train.num_examples // batch_size\n",
+    "        for iteration in range(n_batches):\n",
+    "            print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n",
+    "            sys.stdout.flush()\n",
+    "            X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
+    "            sess.run(training_op, feed_dict={X: X_batch})\n",
+    "        cost_val, reconstruction_loss_val, latent_loss_val = sess.run([cost, reconstruction_loss, latent_loss], feed_dict={X: X_batch})\n",
+    "        print(\"\\r{}\".format(epoch), \"Train cost:\", cost_val, \"\\tReconstruction loss:\", reconstruction_loss_val, \"\\tLatent loss:\", latent_loss_val)\n",
+    "        saver.save(sess, \"my_model_variational.ckpt\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Encode:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_digits = 3\n",
+    "X_test, y_test = mnist.test.next_batch(batch_size)\n",
+    "codings = hidden3\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_variational.ckpt\")\n",
+    "    codings_val = codings.eval(feed_dict={X: X_test})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Decode:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_variational.ckpt\")\n",
+    "    outputs_val = outputs.eval(feed_dict={codings: codings_val})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's plot the reconstructions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(8, 2.5 * n_digits))\n",
+    "for iteration in range(n_digits):\n",
+    "    plt.subplot(n_digits, 2, 1 + 2 * iteration)\n",
+    "    plot_image(X_test[iteration])\n",
+    "    plt.subplot(n_digits, 2, 2 + 2 * iteration)\n",
+    "    plot_image(outputs_val[iteration])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate digits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "n_rows = 6\n",
+    "n_cols = 10\n",
+    "n_digits = n_rows * n_cols\n",
+    "codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_variational.ckpt\")\n",
+    "    outputs_val = outputs.eval(feed_dict={codings: codings_rnd})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot_multiple_images(outputs_val.reshape(-1, 28, 28), n_rows, n_cols)\n",
+    "save_fig(\"generated_digits_plot\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "n_rows = 6\n",
+    "n_cols = 10\n",
+    "n_digits = n_rows * n_cols\n",
+    "codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_variational.ckpt\")\n",
+    "    outputs_val = outputs.eval(feed_dict={codings: codings_rnd})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Interpolate digits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "n_iterations = 3\n",
+    "n_digits = 6\n",
+    "codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "    saver.restore(sess, \"my_model_variational.ckpt\")\n",
+    "    target_codings = np.roll(codings_rnd, -1, axis=0)\n",
+    "    for iteration in range(n_iterations + 1):\n",
+    "        codings_interpolate = codings_rnd + (target_codings - codings_rnd) * iteration / n_iterations\n",
+    "        outputs_val = outputs.eval(feed_dict={codings: codings_interpolate})\n",
+    "        plt.figure(figsize=(11, 1.5*n_iterations))\n",
+    "        for digit_index in range(n_digits):\n",
+    "            plt.subplot(1, n_digits, digit_index + 1)\n",
+    "            plot_image(outputs_val[digit_index])\n",
+    "        plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Exercise solutions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Coming soon..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.1"
+  },
+  "nav_menu": {
+   "height": "381px",
+   "width": "453px"
+  },
+  "toc": {
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 6,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/index.ipynb b/index.ipynb
index a1c72fe..5525d78 100644
--- a/index.ipynb
+++ b/index.ipynb
@@ -37,7 +37,7 @@
     "12. [Distributed TensorFlow](12_distributed_tensorflow.ipynb)\n",
     "13. [Convolutional Neural Networks](13_convolutional_neural_networks.ipynb)\n",
     "14. [Recurrent Neural Networks](14_recurrent_neural_networks.ipynb)\n",
-    "15. Autoencoders (coming soon)\n",
+    "15. [Autoencoders](15_autoencoders.ipynb)\n",
     "16. Reinforcement Learning (coming soon)\n",
     "\n",
     "## Scientific Python tutorials\n",