handson-ml/11_training_deep_neural_net...

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Chapter 11 – Training Deep Neural Networks**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_This notebook contains all the sample code and solutions to the exercises in chapter 11._"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<table align=\"left\">\n",
    "  <td>\n",
    "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/11_training_deep_neural_networks.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
    "  </td>\n",
    "</table>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Python ≥3.5 is required\n",
    "import sys\n",
    "assert sys.version_info >= (3, 5)\n",
    "\n",
    "# Scikit-Learn ≥0.20 is required\n",
    "import sklearn\n",
    "assert sklearn.__version__ >= \"0.20\"\n",
    "\n",
    "try:\n",
    "    # %tensorflow_version only exists in Colab.\n",
    "    %tensorflow_version 2.x\n",
    "except Exception:\n",
    "    pass\n",
    "\n",
    "# TensorFlow ≥2.0 is required\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "assert tf.__version__ >= \"2.0\"\n",
    "\n",
    "%load_ext tensorboard\n",
    "\n",
    "# Common imports\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "# to make this notebook's output stable across runs\n",
    "np.random.seed(42)\n",
    "\n",
    "# To plot pretty figures\n",
    "%matplotlib inline\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "mpl.rc('axes', labelsize=14)\n",
    "mpl.rc('xtick', labelsize=12)\n",
    "mpl.rc('ytick', labelsize=12)\n",
    "\n",
    "# Where to save the figures\n",
    "PROJECT_ROOT_DIR = \".\"\n",
    "CHAPTER_ID = \"deep\"\n",
    "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
    "os.makedirs(IMAGES_PATH, exist_ok=True)\n",
    "\n",
    "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
    "    path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
    "    print(\"Saving figure\", fig_id)\n",
    "    if tight_layout:\n",
    "        plt.tight_layout()\n",
    "    plt.savefig(path, format=fig_extension, dpi=resolution)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Vanishing/Exploding Gradients Problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def logit(z):\n",
    "    return 1 / (1 + np.exp(-z))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "z = np.linspace(-5, 5, 200)\n",
    "\n",
    "plt.plot([-5, 5], [0, 0], 'k-')\n",
    "plt.plot([-5, 5], [1, 1], 'k--')\n",
    "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n",
    "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n",
    "plt.plot(z, logit(z), \"b-\", linewidth=2)\n",
    "props = dict(facecolor='black', shrink=0.1)\n",
    "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n",
    "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n",
    "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n",
    "plt.grid(True)\n",
    "plt.title(\"Sigmoid activation function\", fontsize=14)\n",
    "plt.axis([-5, 5, -0.2, 1.2])\n",
    "\n",
    "save_fig(\"sigmoid_saturation_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Xavier and He Initialization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "[name for name in dir(keras.initializers) if not name.startswith(\"_\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=\"he_normal\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',\n",
    "                                          distribution='uniform')\n",
    "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=init)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Nonsaturating Activation Functions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Leaky ReLU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def leaky_relu(z, alpha=0.01):\n",
    "    return np.maximum(alpha*z, z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n",
    "plt.plot([-5, 5], [0, 0], 'k-')\n",
    "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n",
    "plt.grid(True)\n",
    "props = dict(facecolor='black', shrink=0.1)\n",
    "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n",
    "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n",
    "plt.axis([-5, 5, -0.5, 4.2])\n",
    "\n",
    "save_fig(\"leaky_relu_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "[m for m in dir(keras.activations) if not m.startswith(\"_\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "[m for m in dir(keras.layers) if \"relu\" in m.lower()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's train a neural network on Fashion MNIST using the Leaky ReLU:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
    "X_train_full = X_train_full / 255.0\n",
    "X_test = X_test / 255.0\n",
    "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n",
    "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n",
    "    keras.layers.LeakyReLU(),\n",
    "    keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n",
    "    keras.layers.LeakyReLU(),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "history = model.fit(X_train, y_train, epochs=10,\n",
    "                    validation_data=(X_valid, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's try PReLU:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n",
    "    keras.layers.PReLU(),\n",
    "    keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n",
    "    keras.layers.PReLU(),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(X_train, y_train, epochs=10,\n",
    "                    validation_data=(X_valid, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ELU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def elu(z, alpha=1):\n",
    "    return np.where(z < 0, alpha * (np.exp(z) - 1), z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(z, elu(z), \"b-\", linewidth=2)\n",
    "plt.plot([-5, 5], [0, 0], 'k-')\n",
    "plt.plot([-5, 5], [-1, -1], 'k--')\n",
    "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
    "plt.grid(True)\n",
    "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n",
    "plt.axis([-5, 5, -2.2, 3.2])\n",
    "\n",
    "save_fig(\"elu_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Implementing ELU in TensorFlow is trivial, just specify the activation function when building each layer:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.layers.Dense(10, activation=\"elu\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### SELU"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This activation function was proposed in this [great paper](https://arxiv.org/pdf/1706.02515.pdf) by Günter Klambauer, Thomas Unterthiner and Andreas Mayr, published in June 2017. During training, a neural network composed exclusively of a stack of dense layers using the SELU activation function and LeCun initialization will self-normalize: the output of each layer will tend to preserve the same mean and variance during training, which solves the vanishing/exploding gradients problem. As a result, this activation function outperforms the other activation functions very significantly for such neural nets, so you should really try it out. Unfortunately, the self-normalizing property of the SELU activation function is easily broken: you cannot use ℓ<sub>1</sub> or ℓ<sub>2</sub> regularization, regular dropout, max-norm, skip connections or other non-sequential topologies (so recurrent neural networks won't self-normalize). However, in practice it works quite well with sequential CNNs. If you break self-normalization, SELU will not necessarily outperform other activation functions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.special import erfc\n",
    "\n",
    "# alpha and scale to self normalize with mean 0 and standard deviation 1\n",
    "# (see equation 14 in the paper):\n",
    "alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)\n",
    "scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def selu(z, scale=scale_0_1, alpha=alpha_0_1):\n",
    "    return scale * elu(z, alpha)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(z, selu(z), \"b-\", linewidth=2)\n",
    "plt.plot([-5, 5], [0, 0], 'k-')\n",
    "plt.plot([-5, 5], [-1.758, -1.758], 'k--')\n",
    "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
    "plt.grid(True)\n",
    "plt.title(\"SELU activation function\", fontsize=14)\n",
    "plt.axis([-5, 5, -2.2, 3.2])\n",
    "\n",
    "save_fig(\"selu_plot\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By default, the SELU hyperparameters (`scale` and `alpha`) are tuned in such a way that the mean output of each neuron remains close to 0, and the standard deviation remains close to 1 (assuming the inputs are standardized with mean 0 and standard deviation 1 too). Using this activation function, even a 1,000 layer deep neural network preserves roughly mean 0 and standard deviation 1 across all layers, avoiding the exploding/vanishing gradients problem:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "Z = np.random.normal(size=(500, 100)) # standardized inputs\n",
    "for layer in range(1000):\n",
    "    W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun initialization\n",
    "    Z = selu(np.dot(Z, W))\n",
    "    means = np.mean(Z, axis=0).mean()\n",
    "    stds = np.std(Z, axis=0).mean()\n",
    "    if layer % 100 == 0:\n",
    "        print(\"Layer {}: mean {:.2f}, std deviation {:.2f}\".format(layer, means, stds))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using SELU is easy:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.layers.Dense(10, activation=\"selu\",\n",
    "                   kernel_initializer=\"lecun_normal\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's create a neural net for Fashion MNIST with 100 hidden layers, using the SELU activation function:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "tf.random.set_seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
    "model.add(keras.layers.Dense(300, activation=\"selu\",\n",
    "                             kernel_initializer=\"lecun_normal\"))\n",
    "for layer in range(99):\n",
    "    model.add(keras.layers.Dense(100, activation=\"selu\",\n",
    "                                 kernel_initializer=\"lecun_normal\"))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's train it. Do not forget to scale the inputs to mean 0 and standard deviation 1:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "pixel_means = X_train.mean(axis=0, keepdims=True)\n",
    "pixel_stds = X_train.std(axis=0, keepdims=True)\n",
    "X_train_scaled = (X_train - pixel_means) / pixel_stds\n",
    "X_valid_scaled = (X_valid - pixel_means) / pixel_stds\n",
    "X_test_scaled = (X_test - pixel_means) / pixel_stds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(X_train_scaled, y_train, epochs=5,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now look at what happens if we try to use the ReLU activation function instead:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "tf.random.set_seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
    "model.add(keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"))\n",
    "for layer in range(99):\n",
    "    model.add(keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(X_train_scaled, y_train, epochs=5,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Not great at all, we suffered from the vanishing/exploding gradients problem."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Batch Normalization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.BatchNormalization(),\n",
    "    keras.layers.Dense(300, activation=\"relu\"),\n",
    "    keras.layers.BatchNormalization(),\n",
    "    keras.layers.Dense(100, activation=\"relu\"),\n",
    "    keras.layers.BatchNormalization(),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "bn1 = model.layers[1]\n",
    "[(var.name, var.trainable) for var in bn1.variables]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#bn1.updates #deprecated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(X_train, y_train, epochs=10,\n",
    "                    validation_data=(X_valid, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Sometimes applying BN before the activation function works better (there's a debate on this topic). Moreover, the layer before a `BatchNormalization` layer does not need to have bias terms, since the `BatchNormalization` layer some as well, it would be a waste of parameters, so you can set `use_bias=False` when creating those layers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.BatchNormalization(),\n",
    "    keras.layers.Dense(300, use_bias=False),\n",
    "    keras.layers.BatchNormalization(),\n",
    "    keras.layers.Activation(\"relu\"),\n",
    "    keras.layers.Dense(100, use_bias=False),\n",
    "    keras.layers.BatchNormalization(),\n",
    "    keras.layers.Activation(\"relu\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(X_train, y_train, epochs=10,\n",
    "                    validation_data=(X_valid, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gradient Clipping"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "All Keras optimizers accept `clipnorm` or `clipvalue` arguments:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.SGD(clipvalue=1.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.SGD(clipnorm=1.0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reusing Pretrained Layers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Reusing a Keras model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's split the fashion MNIST training set in two:\n",
    "* `X_train_A`: all images of all items except for sandals and shirts (classes 5 and 6).\n",
    "* `X_train_B`: a much smaller training set of just the first 200 images of sandals or shirts.\n",
    "\n",
    "The validation set and the test set are also split this way, but without restricting the number of images.\n",
    "\n",
    "We will train a model on set A (classification task with 8 classes), and try to reuse it to tackle set B (binary classification). We hope to transfer a little bit of knowledge from task A to task B, since classes in set A (sneakers, ankle boots, coats, t-shirts, etc.) are somewhat similar to classes in set B (sandals and shirts). However, since we are using `Dense` layers, only patterns that occur at the same location can be reused (in contrast, convolutional layers will transfer much better, since learned patterns can be detected anywhere on the image, as we will see in the CNN chapter)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_dataset(X, y):\n",
    "    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts\n",
    "    y_A = y[~y_5_or_6]\n",
    "    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7\n",
    "    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?\n",
    "    return ((X[~y_5_or_6], y_A),\n",
    "            (X[y_5_or_6], y_B))\n",
    "\n",
    "(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)\n",
    "(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)\n",
    "(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)\n",
    "X_train_B = X_train_B[:200]\n",
    "y_train_B = y_train_B[:200]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_A.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_B.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train_A[:30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train_B[:30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_A = keras.models.Sequential()\n",
    "model_A.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
    "for n_hidden in (300, 100, 50, 50, 50):\n",
    "    model_A.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n",
    "model_A.add(keras.layers.Dense(8, activation=\"softmax\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_A.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "                optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "                metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n",
    "                    validation_data=(X_valid_A, y_valid_A))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_A.save(\"my_model_A.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_B = keras.models.Sequential()\n",
    "model_B.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
    "for n_hidden in (300, 100, 50, 50, 50):\n",
    "    model_B.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n",
    "model_B.add(keras.layers.Dense(1, activation=\"sigmoid\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_B.compile(loss=\"binary_crossentropy\",\n",
    "                optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "                metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model_B.fit(X_train_B, y_train_B, epochs=20,\n",
    "                      validation_data=(X_valid_B, y_valid_B))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_B.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_A = keras.models.load_model(\"my_model_A.h5\")\n",
    "model_B_on_A = keras.models.Sequential(model_A.layers[:-1])\n",
    "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_A_clone = keras.models.clone_model(model_A)\n",
    "model_A_clone.set_weights(model_A.get_weights())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "for layer in model_B_on_A.layers[:-1]:\n",
    "    layer.trainable = False\n",
    "\n",
    "model_B_on_A.compile(loss=\"binary_crossentropy\",\n",
    "                     optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "                     metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,\n",
    "                           validation_data=(X_valid_B, y_valid_B))\n",
    "\n",
    "for layer in model_B_on_A.layers[:-1]:\n",
    "    layer.trainable = True\n",
    "\n",
    "model_B_on_A.compile(loss=\"binary_crossentropy\",\n",
    "                     optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "                     metrics=[\"accuracy\"])\n",
    "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,\n",
    "                           validation_data=(X_valid_B, y_valid_B))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So, what's the final verdict?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_B.evaluate(X_test_B, y_test_B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_B_on_A.evaluate(X_test_B, y_test_B)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Great! We got quite a bit of transfer: the error rate dropped by a factor of 4.5!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "(100 - 97.05) / (100 - 99.35)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Faster Optimizers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Momentum optimization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Nesterov Accelerated Gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## AdaGrad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.Adagrad(lr=0.001)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## RMSProp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Adam Optimization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Adamax Optimization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Nadam Optimization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Learning Rate Scheduling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Power Scheduling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```lr = lr0 / (1 + steps / s)**c```\n",
    "* Keras uses `c=1` and `s = 1 / decay`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 25\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "learning_rate = 0.01\n",
    "decay = 1e-4\n",
    "batch_size = 32\n",
    "n_steps_per_epoch = math.ceil(len(X_train) / batch_size)\n",
    "epochs = np.arange(n_epochs)\n",
    "lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)\n",
    "\n",
    "plt.plot(epochs, lrs,  \"o-\")\n",
    "plt.axis([0, n_epochs - 1, 0, 0.01])\n",
    "plt.xlabel(\"Epoch\")\n",
    "plt.ylabel(\"Learning Rate\")\n",
    "plt.title(\"Power Scheduling\", fontsize=14)\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exponential Scheduling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```lr = lr0 * 0.1**(epoch / s)```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "def exponential_decay_fn(epoch):\n",
    "    return 0.01 * 0.1**(epoch / 20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "def exponential_decay(lr0, s):\n",
    "    def exponential_decay_fn(epoch):\n",
    "        return lr0 * 0.1**(epoch / s)\n",
    "    return exponential_decay_fn\n",
    "\n",
    "exponential_decay_fn = exponential_decay(lr0=0.01, s=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
    "n_epochs = 25"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid),\n",
    "                    callbacks=[lr_scheduler])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(history.epoch, history.history[\"lr\"], \"o-\")\n",
    "plt.axis([0, n_epochs - 1, 0, 0.011])\n",
    "plt.xlabel(\"Epoch\")\n",
    "plt.ylabel(\"Learning Rate\")\n",
    "plt.title(\"Exponential Scheduling\", fontsize=14)\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The schedule function can take the current learning rate as a second argument:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "def exponential_decay_fn(epoch, lr):\n",
    "    return lr * 0.1**(1 / 20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you want to update the learning rate at each iteration rather than at each epoch, you must write your own callback class:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "K = keras.backend\n",
    "\n",
    "class ExponentialDecay(keras.callbacks.Callback):\n",
    "    def __init__(self, s=40000):\n",
    "        super().__init__()\n",
    "        self.s = s\n",
    "\n",
    "    def on_batch_begin(self, batch, logs=None):\n",
    "        # Note: the `batch` argument is reset at each epoch\n",
    "        lr = K.get_value(self.model.optimizer.lr)\n",
    "        K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))\n",
    "\n",
    "    def on_epoch_end(self, epoch, logs=None):\n",
    "        logs = logs or {}\n",
    "        logs['lr'] = K.get_value(self.model.optimizer.lr)\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "lr0 = 0.01\n",
    "optimizer = keras.optimizers.Nadam(lr=lr0)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
    "n_epochs = 25\n",
    "\n",
    "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n",
    "exp_decay = ExponentialDecay(s)\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid),\n",
    "                    callbacks=[exp_decay])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_steps = n_epochs * len(X_train) // 32\n",
    "steps = np.arange(n_steps)\n",
    "lrs = lr0 * 0.1**(steps / s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plt.plot(steps, lrs, \"-\", linewidth=2)\n",
    "plt.axis([0, n_steps - 1, 0, lr0 * 1.1])\n",
    "plt.xlabel(\"Batch\")\n",
    "plt.ylabel(\"Learning Rate\")\n",
    "plt.title(\"Exponential Scheduling (per batch)\", fontsize=14)\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Piecewise Constant Scheduling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "def piecewise_constant_fn(epoch):\n",
    "    if epoch < 5:\n",
    "        return 0.01\n",
    "    elif epoch < 15:\n",
    "        return 0.005\n",
    "    else:\n",
    "        return 0.001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "def piecewise_constant(boundaries, values):\n",
    "    boundaries = np.array([0] + boundaries)\n",
    "    values = np.array(values)\n",
    "    def piecewise_constant_fn(epoch):\n",
    "        return values[np.argmax(boundaries > epoch) - 1]\n",
    "    return piecewise_constant_fn\n",
    "\n",
    "piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
    "n_epochs = 25\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid),\n",
    "                    callbacks=[lr_scheduler])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], \"o-\")\n",
    "plt.axis([0, n_epochs - 1, 0, 0.011])\n",
    "plt.xlabel(\"Epoch\")\n",
    "plt.ylabel(\"Learning Rate\")\n",
    "plt.title(\"Piecewise Constant Scheduling\", fontsize=14)\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Performance Scheduling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
    "n_epochs = 25\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid),\n",
    "                    callbacks=[lr_scheduler])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n",
    "plt.xlabel(\"Epoch\")\n",
    "plt.ylabel(\"Learning Rate\", color='b')\n",
    "plt.tick_params('y', colors='b')\n",
    "plt.gca().set_xlim(0, n_epochs - 1)\n",
    "plt.grid(True)\n",
    "\n",
    "ax2 = plt.gca().twinx()\n",
    "ax2.plot(history.epoch, history.history[\"val_loss\"], \"r^-\")\n",
    "ax2.set_ylabel('Validation Loss', color='r')\n",
    "ax2.tick_params('y', colors='r')\n",
    "\n",
    "plt.title(\"Reduce LR on Plateau\", fontsize=14)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### tf.keras schedulers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n",
    "learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)\n",
    "optimizer = keras.optimizers.SGD(learning_rate)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
    "n_epochs = 25\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For piecewise constant scheduling, try this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(\n",
    "    boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],\n",
    "    values=[0.01, 0.005, 0.001])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1Cycle scheduling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "K = keras.backend\n",
    "\n",
    "class ExponentialLearningRate(keras.callbacks.Callback):\n",
    "    def __init__(self, factor):\n",
    "        self.factor = factor\n",
    "        self.rates = []\n",
    "        self.losses = []\n",
    "    def on_batch_end(self, batch, logs):\n",
    "        self.rates.append(K.get_value(self.model.optimizer.lr))\n",
    "        self.losses.append(logs[\"loss\"])\n",
    "        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)\n",
    "\n",
    "def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):\n",
    "    init_weights = model.get_weights()\n",
    "    iterations = math.ceil(len(X) / batch_size) * epochs\n",
    "    factor = np.exp(np.log(max_rate / min_rate) / iterations)\n",
    "    init_lr = K.get_value(model.optimizer.lr)\n",
    "    K.set_value(model.optimizer.lr, min_rate)\n",
    "    exp_lr = ExponentialLearningRate(factor)\n",
    "    history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n",
    "                        callbacks=[exp_lr])\n",
    "    K.set_value(model.optimizer.lr, init_lr)\n",
    "    model.set_weights(init_weights)\n",
    "    return exp_lr.rates, exp_lr.losses\n",
    "\n",
    "def plot_lr_vs_loss(rates, losses):\n",
    "    plt.plot(rates, losses)\n",
    "    plt.gca().set_xscale('log')\n",
    "    plt.hlines(min(losses), min(rates), max(rates))\n",
    "    plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])\n",
    "    plt.xlabel(\"Learning rate\")\n",
    "    plt.ylabel(\"Loss\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 128\n",
    "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n",
    "plot_lr_vs_loss(rates, losses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "class OneCycleScheduler(keras.callbacks.Callback):\n",
    "    def __init__(self, iterations, max_rate, start_rate=None,\n",
    "                 last_iterations=None, last_rate=None):\n",
    "        self.iterations = iterations\n",
    "        self.max_rate = max_rate\n",
    "        self.start_rate = start_rate or max_rate / 10\n",
    "        self.last_iterations = last_iterations or iterations // 10 + 1\n",
    "        self.half_iteration = (iterations - self.last_iterations) // 2\n",
    "        self.last_rate = last_rate or self.start_rate / 1000\n",
    "        self.iteration = 0\n",
    "    def _interpolate(self, iter1, iter2, rate1, rate2):\n",
    "        return ((rate2 - rate1) * (self.iteration - iter1)\n",
    "                / (iter2 - iter1) + rate1)\n",
    "    def on_batch_begin(self, batch, logs):\n",
    "        if self.iteration < self.half_iteration:\n",
    "            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n",
    "        elif self.iteration < 2 * self.half_iteration:\n",
    "            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n",
    "                                     self.max_rate, self.start_rate)\n",
    "        else:\n",
    "            rate = self._interpolate(2 * self.half_iteration, self.iterations,\n",
    "                                     self.start_rate, self.last_rate)\n",
    "        self.iteration += 1\n",
    "        K.set_value(self.model.optimizer.lr, rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 25\n",
    "onecycle = OneCycleScheduler(math.ceil(len(X_train) / batch_size) * n_epochs, max_rate=0.05)\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n",
    "                    validation_data=(X_valid_scaled, y_valid),\n",
    "                    callbacks=[onecycle])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Avoiding Overfitting Through Regularization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## $\\ell_1$ and $\\ell_2$ regularization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "layer = keras.layers.Dense(100, activation=\"elu\",\n",
    "                           kernel_initializer=\"he_normal\",\n",
    "                           kernel_regularizer=keras.regularizers.l2(0.01))\n",
    "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n",
    "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dense(300, activation=\"elu\",\n",
    "                       kernel_initializer=\"he_normal\",\n",
    "                       kernel_regularizer=keras.regularizers.l2(0.01)),\n",
    "    keras.layers.Dense(100, activation=\"elu\",\n",
    "                       kernel_initializer=\"he_normal\",\n",
    "                       kernel_regularizer=keras.regularizers.l2(0.01)),\n",
    "    keras.layers.Dense(10, activation=\"softmax\",\n",
    "                       kernel_regularizer=keras.regularizers.l2(0.01))\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
    "n_epochs = 2\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "from functools import partial\n",
    "\n",
    "RegularizedDense = partial(keras.layers.Dense,\n",
    "                           activation=\"elu\",\n",
    "                           kernel_initializer=\"he_normal\",\n",
    "                           kernel_regularizer=keras.regularizers.l2(0.01))\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    RegularizedDense(300),\n",
    "    RegularizedDense(100),\n",
    "    RegularizedDense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
    "n_epochs = 2\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.Dropout(rate=0.2),\n",
    "    keras.layers.Dense(300, activation=\"elu\", kernel_initializer=\"he_normal\"),\n",
    "    keras.layers.Dropout(rate=0.2),\n",
    "    keras.layers.Dense(100, activation=\"elu\", kernel_initializer=\"he_normal\"),\n",
    "    keras.layers.Dropout(rate=0.2),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
    "n_epochs = 2\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Alpha Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    keras.layers.AlphaDropout(rate=0.2),\n",
    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.AlphaDropout(rate=0.2),\n",
    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
    "    keras.layers.AlphaDropout(rate=0.2),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
    "n_epochs = 20\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.evaluate(X_test_scaled, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.evaluate(X_train_scaled, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(X_train_scaled, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MC Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_probas = np.stack([model(X_test_scaled, training=True)\n",
    "                     for sample in range(100)])\n",
    "y_proba = y_probas.mean(axis=0)\n",
    "y_std = y_probas.std(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.round(model.predict(X_test_scaled[:1]), 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.round(y_probas[:, :1], 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.round(y_proba[:1], 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_std = y_probas.std(axis=0)\n",
    "np.round(y_std[:1], 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = np.argmax(y_proba, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy = np.sum(y_pred == y_test) / len(y_test)\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MCDropout(keras.layers.Dropout):\n",
    "    def call(self, inputs):\n",
    "        return super().call(inputs, training=True)\n",
    "\n",
    "class MCAlphaDropout(keras.layers.AlphaDropout):\n",
    "    def call(self, inputs):\n",
    "        return super().call(inputs, training=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "mc_model = keras.models.Sequential([\n",
    "    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n",
    "    for layer in model.layers\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "mc_model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
    "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "mc_model.set_weights(model.get_weights())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can use the model with MC Dropout:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Max norm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "layer = keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                           kernel_constraint=keras.constraints.max_norm(1.))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "MaxNormDense = partial(keras.layers.Dense,\n",
    "                       activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       kernel_constraint=keras.constraints.max_norm(1.))\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
    "    MaxNormDense(300),\n",
    "    MaxNormDense(100),\n",
    "    keras.layers.Dense(10, activation=\"softmax\")\n",
    "])\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
    "n_epochs = 2\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
    "                    validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. to 7."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "See appendix A."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Deep Learning on CIFAR10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### a.\n",
    "*Exercise: Build a DNN with 20 hidden layers of 100 neurons each (that's too many, but it's the point of this exercise). Use He initialization and the ELU activation function.*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
    "for _ in range(20):\n",
    "    model.add(keras.layers.Dense(100,\n",
    "                                 activation=\"elu\",\n",
    "                                 kernel_initializer=\"he_normal\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### b.\n",
    "*Exercise: Using Nadam optimization and early stopping, train the network on the CIFAR10 dataset. You can load it with `keras.datasets.cifar10.load_data()`. The dataset is composed of 60,000 32 × 32–pixel color images (50,000 for training, 10,000 for testing) with 10 classes, so you'll need a softmax output layer with 10 neurons. Remember to search for the right learning rate each time you change the model's architecture or hyperparameters.*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's add the output layer to the model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's use a Nadam optimizer with a learning rate of 5e-5. I tried learning rates 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3 and 1e-2, and I compared their learning curves for 10 epochs each (using the TensorBoard callback, below). The learning rates 3e-5 and 1e-4 were pretty good, so I tried 5e-5, which turned out to be slightly better."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = keras.optimizers.Nadam(lr=5e-5)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=optimizer,\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's load the CIFAR10 dataset. We also want to use early stopping, so we need a validation set. Let's use the first 5,000 images of the original training set as the validation set:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()\n",
    "\n",
    "X_train = X_train_full[5000:]\n",
    "y_train = y_train_full[5000:]\n",
    "X_valid = X_train_full[:5000]\n",
    "y_valid = y_train_full[:5000]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can create the callbacks we need and train the model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_model.h5\", save_best_only=True)\n",
    "run_index = 1 # increment every time you train the model\n",
    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_{:03d}\".format(run_index))\n",
    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "%tensorboard --logdir=./my_cifar10_logs --port=6006"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train, y_train, epochs=100,\n",
    "          validation_data=(X_valid, y_valid),\n",
    "          callbacks=callbacks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.load_model(\"my_cifar10_model.h5\")\n",
    "model.evaluate(X_valid, y_valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The model with the lowest validation loss gets about 47.6% accuracy on the validation set. It took 27 epochs to reach the lowest validation loss, with roughly 8 seconds per epoch on my laptop (without a GPU). Let's see if we can improve performance using Batch Normalization."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### c.\n",
    "*Exercise: Now try adding Batch Normalization and compare the learning curves: Is it converging faster than before? Does it produce a better model? How does it affect training speed?*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The code below is very similar to the code above, with a few changes:\n",
    "\n",
    "* I added a BN layer after every Dense layer (before the activation function), except for the output layer. I also added a BN layer before the first hidden layer.\n",
    "* I changed the learning rate to 5e-4. I experimented with 1e-5, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3 and 3e-3, and I chose the one with the best validation performance after 20 epochs.\n",
    "* I renamed the run directories to run_bn_* and the model file name to my_cifar10_bn_model.h5."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
    "model.add(keras.layers.BatchNormalization())\n",
    "for _ in range(20):\n",
    "    model.add(keras.layers.Dense(100, kernel_initializer=\"he_normal\"))\n",
    "    model.add(keras.layers.BatchNormalization())\n",
    "    model.add(keras.layers.Activation(\"elu\"))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
    "\n",
    "optimizer = keras.optimizers.Nadam(lr=5e-4)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=optimizer,\n",
    "              metrics=[\"accuracy\"])\n",
    "\n",
    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.h5\", save_best_only=True)\n",
    "run_index = 1 # increment every time you train the model\n",
    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_bn_{:03d}\".format(run_index))\n",
    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n",
    "\n",
    "model.fit(X_train, y_train, epochs=100,\n",
    "          validation_data=(X_valid, y_valid),\n",
    "          callbacks=callbacks)\n",
    "\n",
    "model = keras.models.load_model(\"my_cifar10_bn_model.h5\")\n",
    "model.evaluate(X_valid, y_valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* *Is the model converging faster than before?* Much faster! The previous model took 27 epochs to reach the lowest validation loss, while the new model achieved that same loss in just 5 epochs and continued to make progress until the 16th epoch. The BN layers stabilized training and allowed us to use a much larger learning rate, so convergence was faster.\n",
    "* *Does BN produce a better model?* Yes! The final model is also much better, with 54.0% accuracy instead of 47.6%. It's still not a very good model, but at least it's much better than before (a Convolutional Neural Network would do much better, but that's a different topic, see chapter 14).\n",
    "* *How does BN affect training speed?* Although the model converged much faster, each epoch took about 12s instead of 8s, because of the extra computations required by the BN layers. But overall the training time (wall time) was shortened significantly!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### d.\n",
    "*Exercise: Try replacing Batch Normalization with SELU, and make the necessary adjustements to ensure the network self-normalizes (i.e., standardize the input features, use LeCun normal initialization, make sure the DNN contains only a sequence of dense layers, etc.).*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
    "for _ in range(20):\n",
    "    model.add(keras.layers.Dense(100,\n",
    "                                 kernel_initializer=\"lecun_normal\",\n",
    "                                 activation=\"selu\"))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
    "\n",
    "optimizer = keras.optimizers.Nadam(lr=7e-4)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=optimizer,\n",
    "              metrics=[\"accuracy\"])\n",
    "\n",
    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_selu_model.h5\", save_best_only=True)\n",
    "run_index = 1 # increment every time you train the model\n",
    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_selu_{:03d}\".format(run_index))\n",
    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n",
    "\n",
    "X_means = X_train.mean(axis=0)\n",
    "X_stds = X_train.std(axis=0)\n",
    "X_train_scaled = (X_train - X_means) / X_stds\n",
    "X_valid_scaled = (X_valid - X_means) / X_stds\n",
    "X_test_scaled = (X_test - X_means) / X_stds\n",
    "\n",
    "model.fit(X_train_scaled, y_train, epochs=100,\n",
    "          validation_data=(X_valid_scaled, y_valid),\n",
    "          callbacks=callbacks)\n",
    "\n",
    "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n",
    "model.evaluate(X_valid_scaled, y_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n",
    "model.evaluate(X_valid_scaled, y_valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We get 47.9% accuracy, which is not much better than the original model (47.6%), and not as good as the model using batch normalization (54.0%). However, convergence was almost as fast as with the BN model, plus each epoch took only 7 seconds. So it's by far the fastest model to train so far."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### e.\n",
    "*Exercise: Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
    "for _ in range(20):\n",
    "    model.add(keras.layers.Dense(100,\n",
    "                                 kernel_initializer=\"lecun_normal\",\n",
    "                                 activation=\"selu\"))\n",
    "\n",
    "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
    "\n",
    "optimizer = keras.optimizers.Nadam(lr=5e-4)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=optimizer,\n",
    "              metrics=[\"accuracy\"])\n",
    "\n",
    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_alpha_dropout_model.h5\", save_best_only=True)\n",
    "run_index = 1 # increment every time you train the model\n",
    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_alpha_dropout_{:03d}\".format(run_index))\n",
    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n",
    "\n",
    "X_means = X_train.mean(axis=0)\n",
    "X_stds = X_train.std(axis=0)\n",
    "X_train_scaled = (X_train - X_means) / X_stds\n",
    "X_valid_scaled = (X_valid - X_means) / X_stds\n",
    "X_test_scaled = (X_test - X_means) / X_stds\n",
    "\n",
    "model.fit(X_train_scaled, y_train, epochs=100,\n",
    "          validation_data=(X_valid_scaled, y_valid),\n",
    "          callbacks=callbacks)\n",
    "\n",
    "model = keras.models.load_model(\"my_cifar10_alpha_dropout_model.h5\")\n",
    "model.evaluate(X_valid_scaled, y_valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The model reaches 48.9% accuracy on the validation set. That's very slightly better than without dropout (47.6%). With an extensive hyperparameter search, it might be possible to do better (I tried dropout rates of 5%, 10%, 20% and 40%, and learning rates 1e-4, 3e-4, 5e-4, and 1e-3), but probably not much better in this case."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's use MC Dropout now. We will need the `MCAlphaDropout` class we used earlier, so let's just copy it here for convenience:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MCAlphaDropout(keras.layers.AlphaDropout):\n",
    "    def call(self, inputs):\n",
    "        return super().call(inputs, training=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's create a new model, identical to the one we just trained (with the same weights), but with `MCAlphaDropout` dropout layers instead of `AlphaDropout` layers:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "mc_model = keras.models.Sequential([\n",
    "    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n",
    "    for layer in model.layers\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then let's add a couple utility functions. The first will run the model many times (10 by default) and it will return the mean predicted class probabilities. The second will use these mean probabilities to predict the most likely class for each instance:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mc_dropout_predict_probas(mc_model, X, n_samples=10):\n",
    "    Y_probas = [mc_model.predict(X) for sample in range(n_samples)]\n",
    "    return np.mean(Y_probas, axis=0)\n",
    "\n",
    "def mc_dropout_predict_classes(mc_model, X, n_samples=10):\n",
    "    Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)\n",
    "    return np.argmax(Y_probas, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now let's make predictions for all the instances in the validation set, and compute the accuracy:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)\n",
    "accuracy = np.mean(y_pred == y_valid[:, 0])\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We get no accuracy improvement in this case (we're still at 48.9% accuracy).\n",
    "\n",
    "So the best model we got in this exercise is the Batch Normalization model."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### f.\n",
    "*Exercise: Retrain your model using 1cycle scheduling and see if it improves training speed and model accuracy.*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
    "for _ in range(20):\n",
    "    model.add(keras.layers.Dense(100,\n",
    "                                 kernel_initializer=\"lecun_normal\",\n",
    "                                 activation=\"selu\"))\n",
    "\n",
    "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
    "\n",
    "optimizer = keras.optimizers.SGD(lr=1e-3)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=optimizer,\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 128\n",
    "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n",
    "plot_lr_vs_loss(rates, losses)\n",
    "plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "keras.backend.clear_session()\n",
    "tf.random.set_seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
    "for _ in range(20):\n",
    "    model.add(keras.layers.Dense(100,\n",
    "                                 kernel_initializer=\"lecun_normal\",\n",
    "                                 activation=\"selu\"))\n",
    "\n",
    "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
    "\n",
    "optimizer = keras.optimizers.SGD(lr=1e-2)\n",
    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
    "              optimizer=optimizer,\n",
    "              metrics=[\"accuracy\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 15\n",
    "onecycle = OneCycleScheduler(math.ceil(len(X_train_scaled) / batch_size) * n_epochs, max_rate=0.05)\n",
    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n",
    "                    validation_data=(X_valid_scaled, y_valid),\n",
    "                    callbacks=[onecycle])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "One cycle allowed us to train the model in just 15 epochs, each taking only 2 seconds (thanks to the larger batch size). This is several times faster than the fastest model we trained so far. Moreover, we improved the model's performance (from 47.6% to 52.0%). The batch normalized model reaches a slightly better performance (54%), but it's much slower to train."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  },
  "nav_menu": {
   "height": "360px",
   "width": "416px"
  },
  "toc": {
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 6,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								{
 								 "cells": [
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Rename chapters 11 to 15 and split chapter 15 into 15 and 16

											
										
										
											2019-04-16 14:39:14 +02:00
+								    "**Chapter 11 – Training Deep Neural Networks**"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Fix typos

											
										
										
											2017-08-19 17:01:55 +02:00
+								    "_This notebook contains all the sample code and solutions to the exercises in chapter 11._"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Make notebooks 10 and 11 runnable in Colab without changes

											
										
										
											2019-11-06 04:38:13 +01:00
+								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "<table align=\"left\">\n",
 								    "  <td>\n",
 								    "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/11_training_deep_neural_networks.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
 								    "  </td>\n",
 								    "</table>"
 								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
 								    "# Setup"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Make notebooks 10 and 11 runnable in Colab without changes

											
										
										
											2019-11-06 04:38:13 +01:00
+								    "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0."
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 1,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "# Python ≥3.5 is required\n",
 								    "import sys\n",
 								    "assert sys.version_info >= (3, 5)\n",
 								    "\n",
 								    "# Scikit-Learn ≥0.20 is required\n",
 								    "import sklearn\n",
 								    "assert sklearn.__version__ >= \"0.20\"\n",
 								    "\n",
-												Make notebooks 10 and 11 runnable in Colab without changes

											
										
										
											2019-11-06 04:38:13 +01:00
+								    "try:\n",
 								    "    # %tensorflow_version only exists in Colab.\n",
 								    "    %tensorflow_version 2.x\n",
 								    "except Exception:\n",
 								    "    pass\n",
 								    "\n",
 								    "# TensorFlow ≥2.0 is required\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "import tensorflow as tf\n",
 								    "from tensorflow import keras\n",
 								    "assert tf.__version__ >= \"2.0\"\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "\n",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "%load_ext tensorboard\n",
 								    "\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "# Common imports\n",
 								    "import numpy as np\n",
 								    "import os\n",
 								    "\n",
 								    "# to make this notebook's output stable across runs\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "np.random.seed(42)\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "\n",
 								    "# To plot pretty figures\n",
 								    "%matplotlib inline\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "import matplotlib as mpl\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "import matplotlib.pyplot as plt\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "mpl.rc('axes', labelsize=14)\n",
 								    "mpl.rc('xtick', labelsize=12)\n",
 								    "mpl.rc('ytick', labelsize=12)\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "\n",
 								    "# Where to save the figures\n",
 								    "PROJECT_ROOT_DIR = \".\"\n",
 								    "CHAPTER_ID = \"deep\"\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
 								    "os.makedirs(IMAGES_PATH, exist_ok=True)\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
 								    "    path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "    print(\"Saving figure\", fig_id)\n",
 								    "    if tight_layout:\n",
 								    "        plt.tight_layout()\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    plt.savefig(path, format=fig_extension, dpi=resolution)"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "# Vanishing/Exploding Gradients Problem"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 2,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
 								    "def logit(z):\n",
 								    "    return 1 / (1 + np.exp(-z))"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 3,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
 								    "z = np.linspace(-5, 5, 200)\n",
 								    "\n",
 								    "plt.plot([-5, 5], [0, 0], 'k-')\n",
 								    "plt.plot([-5, 5], [1, 1], 'k--')\n",
 								    "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n",
 								    "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n",
 								    "plt.plot(z, logit(z), \"b-\", linewidth=2)\n",
 								    "props = dict(facecolor='black', shrink=0.1)\n",
 								    "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n",
 								    "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n",
 								    "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n",
 								    "plt.grid(True)\n",
 								    "plt.title(\"Sigmoid activation function\", fontsize=14)\n",
 								    "plt.axis([-5, 5, -0.2, 1.2])\n",
 								    "\n",
 								    "save_fig(\"sigmoid_saturation_plot\")\n",
 								    "plt.show()"
 								   ]
 								  },
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "## Xavier and He Initialization"
 								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "code",
 								   "execution_count": 4,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "[name for name in dir(keras.initializers) if not name.startswith(\"_\")]"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 5,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=\"he_normal\")"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 6,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',\n",
 								    "                                          distribution='uniform')\n",
 								    "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=init)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "## Nonsaturating Activation Functions"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "### Leaky ReLU"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 7,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
 								    "def leaky_relu(z, alpha=0.01):\n",
 								    "    return np.maximum(alpha*z, z)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 8,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "plt.plot([-5, 5], [0, 0], 'k-')\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "plt.grid(True)\n",
 								    "props = dict(facecolor='black', shrink=0.1)\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n",
 								    "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n",
 								    "plt.axis([-5, 5, -0.5, 4.2])\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "save_fig(\"leaky_relu_plot\")\n",
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								    "plt.show()"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 9,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "[m for m in dir(keras.activations) if not m.startswith(\"_\")]"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
 								   "execution_count": 10,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "[m for m in dir(keras.layers) if \"relu\" in m.lower()]"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "Let's train a neural network on Fashion MNIST using the Leaky ReLU:"
 								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "code",
 								   "execution_count": 11,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
 								    "X_train_full = X_train_full / 255.0\n",
 								    "X_test = X_test / 255.0\n",
 								    "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n",
 								    "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								  {
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "cell_type": "code",
 								   "execution_count": 12,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   "source": [
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n",
 								    "    keras.layers.LeakyReLU(),\n",
 								    "    keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n",
 								    "    keras.layers.LeakyReLU(),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "cell_type": "code",
 								   "execution_count": 13,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
 								    "              metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 14,
 								   "metadata": {
 								    "scrolled": true
 								   },
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "history = model.fit(X_train, y_train, epochs=10,\n",
 								    "                    validation_data=(X_valid, y_valid))"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "Now let's try PReLU:"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 15,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "    keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n",
 								    "    keras.layers.PReLU(),\n",
 								    "    keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n",
 								    "    keras.layers.PReLU(),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])"
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   ]
 								  },
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								  {
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 16,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   "source": [
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "              metrics=[\"accuracy\"])"
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 17,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model.fit(X_train, y_train, epochs=10,\n",
 								    "                    validation_data=(X_valid, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "### ELU"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 18,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
 								    "def elu(z, alpha=1):\n",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								    "    return np.where(z < 0, alpha * (np.exp(z) - 1), z)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 19,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "plt.plot(z, elu(z), \"b-\", linewidth=2)\n",
 								    "plt.plot([-5, 5], [0, 0], 'k-')\n",
 								    "plt.plot([-5, 5], [-1, -1], 'k--')\n",
 								    "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
 								    "plt.grid(True)\n",
 								    "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n",
 								    "plt.axis([-5, 5, -2.2, 3.2])\n",
 								    "\n",
 								    "save_fig(\"elu_plot\")\n",
 								    "plt.show()"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "Implementing ELU in TensorFlow is trivial, just specify the activation function when building each layer:"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 20,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "keras.layers.Dense(10, activation=\"elu\")"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### SELU"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "This activation function was proposed in this [great paper](https://arxiv.org/pdf/1706.02515.pdf) by Günter Klambauer, Thomas Unterthiner and Andreas Mayr, published in June 2017. During training, a neural network composed exclusively of a stack of dense layers using the SELU activation function and LeCun initialization will self-normalize: the output of each layer will tend to preserve the same mean and variance during training, which solves the vanishing/exploding gradients problem. As a result, this activation function outperforms the other activation functions very significantly for such neural nets, so you should really try it out. Unfortunately, the self-normalizing property of the SELU activation function is easily broken: you cannot use ℓ<sub>1</sub> or ℓ<sub>2</sub> regularization, regular dropout, max-norm, skip connections or other non-sequential topologies (so recurrent neural networks won't self-normalize). However, in practice it works quite well with sequential CNNs. If you break self-normalization, SELU will not necessarily outperform other activation functions."
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 21,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "from scipy.special import erfc\n",
 								    "\n",
 								    "# alpha and scale to self normalize with mean 0 and standard deviation 1\n",
 								    "# (see equation 14 in the paper):\n",
 								    "alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)\n",
 								    "scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 22,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def selu(z, scale=scale_0_1, alpha=alpha_0_1):\n",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								    "    return scale * elu(z, alpha)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 23,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
 								    "plt.plot(z, selu(z), \"b-\", linewidth=2)\n",
 								    "plt.plot([-5, 5], [0, 0], 'k-')\n",
 								    "plt.plot([-5, 5], [-1.758, -1.758], 'k--')\n",
 								    "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n",
 								    "plt.grid(True)\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "plt.title(\"SELU activation function\", fontsize=14)\n",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								    "plt.axis([-5, 5, -2.2, 3.2])\n",
 								    "\n",
 								    "save_fig(\"selu_plot\")\n",
 								    "plt.show()"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "By default, the SELU hyperparameters (`scale` and `alpha`) are tuned in such a way that the mean output of each neuron remains close to 0, and the standard deviation remains close to 1 (assuming the inputs are standardized with mean 0 and standard deviation 1 too). Using this activation function, even a 1,000 layer deep neural network preserves roughly mean 0 and standard deviation 1 across all layers, avoiding the exploding/vanishing gradients problem:"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 24,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
 								    "np.random.seed(42)\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "Z = np.random.normal(size=(500, 100)) # standardized inputs\n",
 								    "for layer in range(1000):\n",
 								    "    W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun initialization\n",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								    "    Z = selu(np.dot(Z, W))\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    means = np.mean(Z, axis=0).mean()\n",
 								    "    stds = np.std(Z, axis=0).mean()\n",
 								    "    if layer % 100 == 0:\n",
 								    "        print(\"Layer {}: mean {:.2f}, std deviation {:.2f}\".format(layer, means, stds))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "Using SELU is easy:"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 25,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "keras.layers.Dense(10, activation=\"selu\",\n",
 								    "                   kernel_initializer=\"lecun_normal\")"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "Let's create a neural net for Fashion MNIST with 100 hidden layers, using the SELU activation function:"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 26,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "np.random.seed(42)\n",
 								    "tf.random.set_seed(42)"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 27,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
 								    "model.add(keras.layers.Dense(300, activation=\"selu\",\n",
 								    "                             kernel_initializer=\"lecun_normal\"))\n",
 								    "for layer in range(99):\n",
 								    "    model.add(keras.layers.Dense(100, activation=\"selu\",\n",
 								    "                                 kernel_initializer=\"lecun_normal\"))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 28,
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "              metrics=[\"accuracy\"])"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "Now let's train it. Do not forget to scale the inputs to mean 0 and standard deviation 1:"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 29,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "pixel_means = X_train.mean(axis=0, keepdims=True)\n",
 								    "pixel_stds = X_train.std(axis=0, keepdims=True)\n",
 								    "X_train_scaled = (X_train - pixel_means) / pixel_stds\n",
 								    "X_valid_scaled = (X_valid - pixel_means) / pixel_stds\n",
 								    "X_test_scaled = (X_test - pixel_means) / pixel_stds"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 30,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=5,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "Now look at what happens if we try to use the ReLU activation function instead:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 31,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "np.random.seed(42)\n",
 								    "tf.random.set_seed(42)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 32,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
 								    "model.add(keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"))\n",
 								    "for layer in range(99):\n",
 								    "    model.add(keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 33,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "              metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 34,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=5,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "Not great at all, we suffered from the vanishing/exploding gradients problem."
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "# Batch Normalization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 35,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.BatchNormalization(),\n",
 								    "    keras.layers.Dense(300, activation=\"relu\"),\n",
 								    "    keras.layers.BatchNormalization(),\n",
 								    "    keras.layers.Dense(100, activation=\"relu\"),\n",
 								    "    keras.layers.BatchNormalization(),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 36,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model.summary()"
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 37,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "bn1 = model.layers[1]\n",
 								    "[(var.name, var.trainable) for var in bn1.variables]"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 38,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												layer.updates is deprecated, and model_B.summary() instead of model.summary(), fixes #380

											
										
										
											2021-02-18 20:26:32 +01:00
+								    "#bn1.updates #deprecated"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 39,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "              metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 40,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model.fit(X_train, y_train, epochs=10,\n",
 								    "                    validation_data=(X_valid, y_valid))"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   "source": [
-												No bias before BN layers

											
										
										
											2019-03-25 05:03:44 +01:00
+								    "Sometimes applying BN before the activation function works better (there's a debate on this topic). Moreover, the layer before a `BatchNormalization` layer does not need to have bias terms, since the `BatchNormalization` layer some as well, it would be a waste of parameters, so you can set `use_bias=False` when creating those layers:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 41,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.BatchNormalization(),\n",
-												No bias before BN layers

											
										
										
											2019-03-25 05:03:44 +01:00
+								    "    keras.layers.Dense(300, use_bias=False),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    keras.layers.BatchNormalization(),\n",
 								    "    keras.layers.Activation(\"relu\"),\n",
-												No bias before BN layers

											
										
										
											2019-03-25 05:03:44 +01:00
+								    "    keras.layers.Dense(100, use_bias=False),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    keras.layers.BatchNormalization(),\n",
-												Update 11_training_deep_neural_networks.ipynb

Swapped the Activation and BatchNormalization lines in order to make the code consistent with the description and the book (p. 343), i.e. adding the BN layers BEFORE the activation function.
											
										
										
											2019-12-26 05:00:26 +01:00
+								    "    keras.layers.Activation(\"relu\"),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 42,
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "              metrics=[\"accuracy\"])"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 43,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model.fit(X_train, y_train, epochs=10,\n",
 								    "                    validation_data=(X_valid, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
 								    "## Gradient Clipping"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "All Keras optimizers accept `clipnorm` or `clipvalue` arguments:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 44,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.SGD(clipvalue=1.0)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 45,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.SGD(clipnorm=1.0)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Reusing Pretrained Layers"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### Reusing a Keras model"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "Let's split the fashion MNIST training set in two:\n",
 								    "* `X_train_A`: all images of all items except for sandals and shirts (classes 5 and 6).\n",
 								    "* `X_train_B`: a much smaller training set of just the first 200 images of sandals or shirts.\n",
 								    "\n",
 								    "The validation set and the test set are also split this way, but without restricting the number of images.\n",
 								    "\n",
 								    "We will train a model on set A (classification task with 8 classes), and try to reuse it to tackle set B (binary classification). We hope to transfer a little bit of knowledge from task A to task B, since classes in set A (sneakers, ankle boots, coats, t-shirts, etc.) are somewhat similar to classes in set B (sandals and shirts). However, since we are using `Dense` layers, only patterns that occur at the same location can be reused (in contrast, convolutional layers will transfer much better, since learned patterns can be detected anywhere on the image, as we will see in the CNN chapter)."
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 46,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def split_dataset(X, y):\n",
 								    "    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts\n",
 								    "    y_A = y[~y_5_or_6]\n",
 								    "    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7\n",
 								    "    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?\n",
 								    "    return ((X[~y_5_or_6], y_A),\n",
 								    "            (X[y_5_or_6], y_B))\n",
 								    "\n",
 								    "(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)\n",
 								    "(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)\n",
 								    "(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)\n",
 								    "X_train_B = X_train_B[:200]\n",
 								    "y_train_B = y_train_B[:200]"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 47,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "X_train_A.shape"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 48,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "X_train_B.shape"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 49,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "y_train_A[:30]"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 50,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "y_train_B[:30]"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 51,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 52,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_A = keras.models.Sequential()\n",
 								    "model_A.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
 								    "for n_hidden in (300, 100, 50, 50, 50):\n",
 								    "    model_A.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n",
 								    "model_A.add(keras.layers.Dense(8, activation=\"softmax\"))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 53,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model_A.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "                optimizer=keras.optimizers.SGD(lr=1e-3),\n",
 								    "                metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 54,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n",
 								    "                    validation_data=(X_valid_A, y_valid_A))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 55,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_A.save(\"my_model_A.h5\")"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 56,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_B = keras.models.Sequential()\n",
 								    "model_B.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
 								    "for n_hidden in (300, 100, 50, 50, 50):\n",
 								    "    model_B.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n",
 								    "model_B.add(keras.layers.Dense(1, activation=\"sigmoid\"))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 57,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model_B.compile(loss=\"binary_crossentropy\",\n",
 								    "                optimizer=keras.optimizers.SGD(lr=1e-3),\n",
 								    "                metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 58,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model_B.fit(X_train_B, y_train_B, epochs=20,\n",
 								    "                      validation_data=(X_valid_B, y_valid_B))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 59,
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												layer.updates is deprecated, and model_B.summary() instead of model.summary(), fixes #380

											
										
										
											2021-02-18 20:26:32 +01:00
+								    "model_B.summary()"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 60,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_A = keras.models.load_model(\"my_model_A.h5\")\n",
 								    "model_B_on_A = keras.models.Sequential(model_A.layers[:-1])\n",
 								    "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 61,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_A_clone = keras.models.clone_model(model_A)\n",
 								    "model_A_clone.set_weights(model_A.get_weights())"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 62,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "for layer in model_B_on_A.layers[:-1]:\n",
 								    "    layer.trainable = False\n",
 								    "\n",
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model_B_on_A.compile(loss=\"binary_crossentropy\",\n",
 								    "                     optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "                     metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 63,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,\n",
 								    "                           validation_data=(X_valid_B, y_valid_B))\n",
 								    "\n",
 								    "for layer in model_B_on_A.layers[:-1]:\n",
 								    "    layer.trainable = True\n",
 								    "\n",
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model_B_on_A.compile(loss=\"binary_crossentropy\",\n",
 								    "                     optimizer=keras.optimizers.SGD(lr=1e-3),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "                     metrics=[\"accuracy\"])\n",
 								    "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,\n",
 								    "                           validation_data=(X_valid_B, y_valid_B))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "So, what's the final verdict?"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 64,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_B.evaluate(X_test_B, y_test_B)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 65,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model_B_on_A.evaluate(X_test_B, y_test_B)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "Great! We got quite a bit of transfer: the error rate dropped by a factor of 4.5!"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 66,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "(100 - 97.05) / (100 - 99.35)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "# Faster Optimizers"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Momentum optimization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 67,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Nesterov Accelerated Gradient"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 68,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## AdaGrad"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 69,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.Adagrad(lr=0.001)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## RMSProp"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 70,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Adam Optimization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 71,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Adamax Optimization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 72,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Nadam Optimization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 73,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Learning Rate Scheduling"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### Power Scheduling"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "```lr = lr0 / (1 + steps / s)**c```\n",
 								    "* Keras uses `c=1` and `s = 1 / decay`"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 74,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 75,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 76,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 25\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 77,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Adjust computation of steps per epoch

The number of steps per epoch is ceil(len(X) / batch_size) rather than floor(len(X) / batch_size). This change also means we do not have to take max(rate, self.last_rate) in the last steps of the OneCycleScheduler.

											
										
										
											2020-09-25 14:25:17 +02:00
+								    "import math\n",
 								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "learning_rate = 0.01\n",
 								    "decay = 1e-4\n",
 								    "batch_size = 32\n",
-												Adjust computation of steps per epoch

The number of steps per epoch is ceil(len(X) / batch_size) rather than floor(len(X) / batch_size). This change also means we do not have to take max(rate, self.last_rate) in the last steps of the OneCycleScheduler.

											
										
										
											2020-09-25 14:25:17 +02:00
+								    "n_steps_per_epoch = math.ceil(len(X_train) / batch_size)\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "epochs = np.arange(n_epochs)\n",
 								    "lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)\n",
 								    "\n",
 								    "plt.plot(epochs, lrs,  \"o-\")\n",
 								    "plt.axis([0, n_epochs - 1, 0, 0.01])\n",
 								    "plt.xlabel(\"Epoch\")\n",
 								    "plt.ylabel(\"Learning Rate\")\n",
 								    "plt.title(\"Power Scheduling\", fontsize=14)\n",
 								    "plt.grid(True)\n",
 								    "plt.show()"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### Exponential Scheduling"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "```lr = lr0 * 0.1**(epoch / s)```"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 78,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def exponential_decay_fn(epoch):\n",
 								    "    return 0.01 * 0.1**(epoch / 20)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 79,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def exponential_decay(lr0, s):\n",
 								    "    def exponential_decay_fn(epoch):\n",
 								    "        return lr0 * 0.1**(epoch / s)\n",
 								    "    return exponential_decay_fn\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "exponential_decay_fn = exponential_decay(lr0=0.01, s=20)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 80,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 25"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 81,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "                    callbacks=[lr_scheduler])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 82,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "plt.plot(history.epoch, history.history[\"lr\"], \"o-\")\n",
 								    "plt.axis([0, n_epochs - 1, 0, 0.011])\n",
 								    "plt.xlabel(\"Epoch\")\n",
 								    "plt.ylabel(\"Learning Rate\")\n",
 								    "plt.title(\"Exponential Scheduling\", fontsize=14)\n",
 								    "plt.grid(True)\n",
 								    "plt.show()"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "The schedule function can take the current learning rate as a second argument:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 83,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def exponential_decay_fn(epoch, lr):\n",
 								    "    return lr * 0.1**(1 / 20)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "If you want to update the learning rate at each iteration rather than at each epoch, you must write your own callback class:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 84,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "K = keras.backend\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "class ExponentialDecay(keras.callbacks.Callback):\n",
 								    "    def __init__(self, s=40000):\n",
 								    "        super().__init__()\n",
 								    "        self.s = s\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    def on_batch_begin(self, batch, logs=None):\n",
 								    "        # Note: the `batch` argument is reset at each epoch\n",
 								    "        lr = K.get_value(self.model.optimizer.lr)\n",
 								    "        K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "    def on_epoch_end(self, epoch, logs=None):\n",
 								    "        logs = logs or {}\n",
 								    "        logs['lr'] = K.get_value(self.model.optimizer.lr)\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
 								    "lr0 = 0.01\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "optimizer = keras.optimizers.Nadam(lr=lr0)\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
 								    "n_epochs = 25\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n",
 								    "exp_decay = ExponentialDecay(s)\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "                    callbacks=[exp_decay])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 85,
-												Remove uncessary reuse_vars_dict in notebook 11

											
										
										
											2018-03-24 22:50:29 +01:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_steps = n_epochs * len(X_train) // 32\n",
 								    "steps = np.arange(n_steps)\n",
 								    "lrs = lr0 * 0.1**(steps / s)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 86,
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "metadata": {
 								    "scrolled": true
 								   },
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "plt.plot(steps, lrs, \"-\", linewidth=2)\n",
 								    "plt.axis([0, n_steps - 1, 0, lr0 * 1.1])\n",
 								    "plt.xlabel(\"Batch\")\n",
 								    "plt.ylabel(\"Learning Rate\")\n",
 								    "plt.title(\"Exponential Scheduling (per batch)\", fontsize=14)\n",
 								    "plt.grid(True)\n",
 								    "plt.show()"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### Piecewise Constant Scheduling"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 87,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def piecewise_constant_fn(epoch):\n",
 								    "    if epoch < 5:\n",
 								    "        return 0.01\n",
 								    "    elif epoch < 15:\n",
 								    "        return 0.005\n",
 								    "    else:\n",
 								    "        return 0.001"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 88,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "def piecewise_constant(boundaries, values):\n",
 								    "    boundaries = np.array([0] + boundaries)\n",
 								    "    values = np.array(values)\n",
 								    "    def piecewise_constant_fn(epoch):\n",
 								    "        return values[np.argmax(boundaries > epoch) - 1]\n",
 								    "    return piecewise_constant_fn\n",
 								    "\n",
 								    "piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 89,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)\n",
 								    "\n",
 								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 25\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "                    callbacks=[lr_scheduler])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 90,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], \"o-\")\n",
 								    "plt.axis([0, n_epochs - 1, 0, 0.011])\n",
 								    "plt.xlabel(\"Epoch\")\n",
 								    "plt.ylabel(\"Learning Rate\")\n",
 								    "plt.title(\"Piecewise Constant Scheduling\", fontsize=14)\n",
 								    "plt.grid(True)\n",
 								    "plt.show()"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### Performance Scheduling"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 91,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 92,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)\n",
 								    "\n",
 								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
 								    "n_epochs = 25\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid),\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "                    callbacks=[lr_scheduler])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 93,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n",
 								    "plt.xlabel(\"Epoch\")\n",
 								    "plt.ylabel(\"Learning Rate\", color='b')\n",
 								    "plt.tick_params('y', colors='b')\n",
 								    "plt.gca().set_xlim(0, n_epochs - 1)\n",
 								    "plt.grid(True)\n",
 								    "\n",
 								    "ax2 = plt.gca().twinx()\n",
 								    "ax2.plot(history.epoch, history.history[\"val_loss\"], \"r^-\")\n",
 								    "ax2.set_ylabel('Validation Loss', color='r')\n",
 								    "ax2.tick_params('y', colors='r')\n",
 								    "\n",
 								    "plt.title(\"Reduce LR on Plateau\", fontsize=14)\n",
 								    "plt.show()"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "### tf.keras schedulers"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 94,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
 								    "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n",
 								    "learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)\n",
 								    "optimizer = keras.optimizers.SGD(learning_rate)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
 								    "n_epochs = 25\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "For piecewise constant scheduling, try this:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 95,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(\n",
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								    "    boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],\n",
 								    "    values=[0.01, 0.005, 0.001])"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "### 1Cycle scheduling"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 96,
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "K = keras.backend\n",
 								    "\n",
 								    "class ExponentialLearningRate(keras.callbacks.Callback):\n",
 								    "    def __init__(self, factor):\n",
 								    "        self.factor = factor\n",
 								    "        self.rates = []\n",
 								    "        self.losses = []\n",
 								    "    def on_batch_end(self, batch, logs):\n",
 								    "        self.rates.append(K.get_value(self.model.optimizer.lr))\n",
 								    "        self.losses.append(logs[\"loss\"])\n",
 								    "        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)\n",
 								    "\n",
 								    "def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):\n",
 								    "    init_weights = model.get_weights()\n",
-												Adjust computation of steps per epoch

The number of steps per epoch is ceil(len(X) / batch_size) rather than floor(len(X) / batch_size). This change also means we do not have to take max(rate, self.last_rate) in the last steps of the OneCycleScheduler.

											
										
										
											2020-09-25 14:25:17 +02:00
+								    "    iterations = math.ceil(len(X) / batch_size) * epochs\n",
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								    "    factor = np.exp(np.log(max_rate / min_rate) / iterations)\n",
 								    "    init_lr = K.get_value(model.optimizer.lr)\n",
 								    "    K.set_value(model.optimizer.lr, min_rate)\n",
 								    "    exp_lr = ExponentialLearningRate(factor)\n",
 								    "    history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n",
 								    "                        callbacks=[exp_lr])\n",
 								    "    K.set_value(model.optimizer.lr, init_lr)\n",
 								    "    model.set_weights(init_weights)\n",
 								    "    return exp_lr.rates, exp_lr.losses\n",
 								    "\n",
 								    "def plot_lr_vs_loss(rates, losses):\n",
 								    "    plt.plot(rates, losses)\n",
 								    "    plt.gca().set_xscale('log')\n",
 								    "    plt.hlines(min(losses), min(rates), max(rates))\n",
 								    "    plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])\n",
 								    "    plt.xlabel(\"Learning rate\")\n",
 								    "    plt.ylabel(\"Loss\")"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 97,
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												SGD now defaults to lr=0.01 so use 1e-3 explicitely

											
										
										
											2019-06-10 04:48:00 +02:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=keras.optimizers.SGD(lr=1e-3),\n",
 								    "              metrics=[\"accuracy\"])"
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 98,
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "batch_size = 128\n",
 								    "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n",
 								    "plot_lr_vs_loss(rates, losses)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 99,
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "class OneCycleScheduler(keras.callbacks.Callback):\n",
 								    "    def __init__(self, iterations, max_rate, start_rate=None,\n",
 								    "                 last_iterations=None, last_rate=None):\n",
 								    "        self.iterations = iterations\n",
 								    "        self.max_rate = max_rate\n",
 								    "        self.start_rate = start_rate or max_rate / 10\n",
 								    "        self.last_iterations = last_iterations or iterations // 10 + 1\n",
 								    "        self.half_iteration = (iterations - self.last_iterations) // 2\n",
 								    "        self.last_rate = last_rate or self.start_rate / 1000\n",
 								    "        self.iteration = 0\n",
 								    "    def _interpolate(self, iter1, iter2, rate1, rate2):\n",
-												Fix error in OneCycleSchedule._interpolate(), fixes #56

											
										
										
											2019-11-05 04:16:48 +01:00
+								    "        return ((rate2 - rate1) * (self.iteration - iter1)\n",
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								    "                / (iter2 - iter1) + rate1)\n",
 								    "    def on_batch_begin(self, batch, logs):\n",
 								    "        if self.iteration < self.half_iteration:\n",
 								    "            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n",
 								    "        elif self.iteration < 2 * self.half_iteration:\n",
 								    "            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n",
 								    "                                     self.max_rate, self.start_rate)\n",
 								    "        else:\n",
 								    "            rate = self._interpolate(2 * self.half_iteration, self.iterations,\n",
 								    "                                     self.start_rate, self.last_rate)\n",
 								    "        self.iteration += 1\n",
 								    "        K.set_value(self.model.optimizer.lr, rate)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 100,
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "n_epochs = 25\n",
-												Adjust computation of steps per epoch

The number of steps per epoch is ceil(len(X) / batch_size) rather than floor(len(X) / batch_size). This change also means we do not have to take max(rate, self.last_rate) in the last steps of the OneCycleScheduler.

											
										
										
											2020-09-25 14:25:17 +02:00
+								    "onecycle = OneCycleScheduler(math.ceil(len(X_train) / batch_size) * n_epochs, max_rate=0.05)\n",
-												Add 1cycle scheduling

											
										
										
											2019-05-05 06:42:08 +02:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid),\n",
 								    "                    callbacks=[onecycle])"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "# Avoiding Overfitting Through Regularization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## $\\ell_1$ and $\\ell_2$ regularization"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 101,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "layer = keras.layers.Dense(100, activation=\"elu\",\n",
 								    "                           kernel_initializer=\"he_normal\",\n",
 								    "                           kernel_regularizer=keras.regularizers.l2(0.01))\n",
 								    "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n",
 								    "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 102,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dense(300, activation=\"elu\",\n",
 								    "                       kernel_initializer=\"he_normal\",\n",
 								    "                       kernel_regularizer=keras.regularizers.l2(0.01)),\n",
 								    "    keras.layers.Dense(100, activation=\"elu\",\n",
 								    "                       kernel_initializer=\"he_normal\",\n",
 								    "                       kernel_regularizer=keras.regularizers.l2(0.01)),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\",\n",
 								    "                       kernel_regularizer=keras.regularizers.l2(0.01))\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 2\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 103,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "from functools import partial\n",
 								    "\n",
 								    "RegularizedDense = partial(keras.layers.Dense,\n",
 								    "                           activation=\"elu\",\n",
 								    "                           kernel_initializer=\"he_normal\",\n",
 								    "                           kernel_regularizer=keras.regularizers.l2(0.01))\n",
 								    "\n",
 								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    RegularizedDense(300),\n",
 								    "    RegularizedDense(100),\n",
 								    "    RegularizedDense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 2\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Dropout"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 104,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.Dropout(rate=0.2),\n",
 								    "    keras.layers.Dense(300, activation=\"elu\", kernel_initializer=\"he_normal\"),\n",
 								    "    keras.layers.Dropout(rate=0.2),\n",
 								    "    keras.layers.Dense(100, activation=\"elu\", kernel_initializer=\"he_normal\"),\n",
 								    "    keras.layers.Dropout(rate=0.2),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 2\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Alpha Dropout"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 105,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "source": [
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 106,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    keras.layers.AlphaDropout(rate=0.2),\n",
 								    "    keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.AlphaDropout(rate=0.2),\n",
 								    "    keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n",
 								    "    keras.layers.AlphaDropout(rate=0.2),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n",
 								    "n_epochs = 20\n",
 								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 107,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "model.evaluate(X_test_scaled, y_test)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 108,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "model.evaluate(X_train_scaled, y_train)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 109,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
-												Do not use learning_phase anymore, just set training=True/False

											
										
										
											2019-05-09 04:39:02 +02:00
+								    "history = model.fit(X_train_scaled, y_train)"
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "## MC Dropout"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 110,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 111,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
-												Do not use learning_phase anymore, just set training=True/False

											
										
										
											2019-05-09 04:39:02 +02:00
+								    "y_probas = np.stack([model(X_test_scaled, training=True)\n",
 								    "                     for sample in range(100)])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "y_proba = y_probas.mean(axis=0)\n",
 								    "y_std = y_probas.std(axis=0)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 112,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "np.round(model.predict(X_test_scaled[:1]), 2)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 113,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "np.round(y_probas[:, :1], 2)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 114,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "np.round(y_proba[:1], 2)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 115,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "y_std = y_probas.std(axis=0)\n",
 								    "np.round(y_std[:1], 2)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 116,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "y_pred = np.argmax(y_proba, axis=1)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 117,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "accuracy = np.sum(y_pred == y_test) / len(y_test)\n",
 								    "accuracy"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 118,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "class MCDropout(keras.layers.Dropout):\n",
 								    "    def call(self, inputs):\n",
 								    "        return super().call(inputs, training=True)\n",
 								    "\n",
 								    "class MCAlphaDropout(keras.layers.AlphaDropout):\n",
 								    "    def call(self, inputs):\n",
 								    "        return super().call(inputs, training=True)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 119,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 120,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "mc_model = keras.models.Sequential([\n",
 								    "    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n",
 								    "    for layer in model.layers\n",
 								    "])"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 121,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "mc_model.summary()"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 122,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n",
 								    "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 123,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "mc_model.set_weights(model.get_weights())"
 								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
 								   "source": [
 								    "Now we can use the model with MC Dropout:"
 								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 124,
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								   "metadata": {},
 								   "outputs": [],
 								   "source": [
 								    "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## Max norm"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 125,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "outputs": [],
 								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "layer = keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
 								    "                           kernel_constraint=keras.constraints.max_norm(1.))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Do not use a layer as an activation function, especially with weights

											
										
										
											2019-06-09 14:08:53 +02:00
+								   "execution_count": 126,
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "MaxNormDense = partial(keras.layers.Dense,\n",
 								    "                       activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
 								    "                       kernel_constraint=keras.constraints.max_norm(1.))\n",
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								    "\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "model = keras.models.Sequential([\n",
 								    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
 								    "    MaxNormDense(300),\n",
 								    "    MaxNormDense(100),\n",
 								    "    keras.layers.Dense(10, activation=\"softmax\")\n",
 								    "])\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "n_epochs = 2\n",
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid))"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Update notebooks to latest nbformat

											
										
										
											2020-04-06 09:13:12 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Add MC Dropout, finish chapter

											
										
										
											2019-02-28 12:48:06 +01:00
+								    "# Exercises"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "## 1. to 7."
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								    "See appendix A."
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "## 8. Deep Learning on CIFAR10"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "### a.\n",
 								    "*Exercise: Build a DNN with 20 hidden layers of 100 neurons each (that's too many, but it's the point of this exercise). Use He initialization and the ELU activation function.*"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 127,
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
 								    "for _ in range(20):\n",
 								    "    model.add(keras.layers.Dense(100,\n",
 								    "                                 activation=\"elu\",\n",
 								    "                                 kernel_initializer=\"he_normal\"))"
 								   ]
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "### b.\n",
 								    "*Exercise: Using Nadam optimization and early stopping, train the network on the CIFAR10 dataset. You can load it with `keras.datasets.cifar10.load_data()`. The dataset is composed of 60,000 32 × 32–pixel color images (50,000 for training, 10,000 for testing) with 10 classes, so you'll need a softmax output layer with 10 neurons. Remember to search for the right learning rate each time you change the model's architecture or hyperparameters.*"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "Let's add the output layer to the model:"
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 128,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "Let's use a Nadam optimizer with a learning rate of 5e-5. I tried learning rates 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3 and 1e-2, and I compared their learning curves for 10 epochs each (using the TensorBoard callback, below). The learning rates 3e-5 and 1e-4 were pretty good, so I tried 5e-5, which turned out to be slightly better."
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 129,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "optimizer = keras.optimizers.Nadam(lr=5e-5)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=optimizer,\n",
 								    "              metrics=[\"accuracy\"])"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "Let's load the CIFAR10 dataset. We also want to use early stopping, so we need a validation set. Let's use the first 5,000 images of the original training set as the validation set:"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 130,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()\n",
 								    "\n",
 								    "X_train = X_train_full[5000:]\n",
 								    "y_train = y_train_full[5000:]\n",
 								    "X_valid = X_train_full[:5000]\n",
 								    "y_valid = y_train_full[:5000]"
 								   ]
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "Now we can create the callbacks we need and train the model:"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "cell_type": "code",
 								   "execution_count": 131,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "outputs": [],
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
 								    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_model.h5\", save_best_only=True)\n",
 								    "run_index = 1 # increment every time you train the model\n",
 								    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_{:03d}\".format(run_index))\n",
 								    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
 								    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 132,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "%tensorboard --logdir=./my_cifar10_logs --port=6006"
 								   ]
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 133,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "model.fit(X_train, y_train, epochs=100,\n",
 								    "          validation_data=(X_valid, y_valid),\n",
 								    "          callbacks=callbacks)"
 								   ]
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  },
 								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 134,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "outputs": [],
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "model = keras.models.load_model(\"my_cifar10_model.h5\")\n",
 								    "model.evaluate(X_valid, y_valid)"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								  {
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "The model with the lowest validation loss gets about 47.6% accuracy on the validation set. It took 27 epochs to reach the lowest validation loss, with roughly 8 seconds per epoch on my laptop (without a GPU). Let's see if we can improve performance using Batch Normalization."
-												Use tf.layers instead of tf.contrib.layers

											
										
										
											2017-04-30 10:21:27 +02:00
+								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "### c.\n",
 								    "*Exercise: Now try adding Batch Normalization and compare the learning curves: Is it converging faster than before? Does it produce a better model? How does it affect training speed?*"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "The code below is very similar to the code above, with a few changes:\n",
 								    "\n",
 								    "* I added a BN layer after every Dense layer (before the activation function), except for the output layer. I also added a BN layer before the first hidden layer.\n",
 								    "* I changed the learning rate to 5e-4. I experimented with 1e-5, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3 and 3e-3, and I chose the one with the best validation performance after 20 epochs.\n",
 								    "* I renamed the run directories to run_bn_* and the model file name to my_cifar10_bn_model.h5."
 								   ]
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 135,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
 								    "model.add(keras.layers.BatchNormalization())\n",
 								    "for _ in range(20):\n",
 								    "    model.add(keras.layers.Dense(100, kernel_initializer=\"he_normal\"))\n",
 								    "    model.add(keras.layers.BatchNormalization())\n",
 								    "    model.add(keras.layers.Activation(\"elu\"))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
 								    "\n",
 								    "optimizer = keras.optimizers.Nadam(lr=5e-4)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=optimizer,\n",
 								    "              metrics=[\"accuracy\"])\n",
 								    "\n",
 								    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
 								    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.h5\", save_best_only=True)\n",
 								    "run_index = 1 # increment every time you train the model\n",
 								    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_bn_{:03d}\".format(run_index))\n",
 								    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
 								    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n",
 								    "\n",
 								    "model.fit(X_train, y_train, epochs=100,\n",
 								    "          validation_data=(X_valid, y_valid),\n",
 								    "          callbacks=callbacks)\n",
 								    "\n",
 								    "model = keras.models.load_model(\"my_cifar10_bn_model.h5\")\n",
 								    "model.evaluate(X_valid, y_valid)"
 								   ]
-												Update deep learning chapter to tf.keras, add SELU and more

											
										
										
											2019-02-17 13:31:28 +01:00
+								  },
 								  {
 								   "cell_type": "markdown",
 								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "* *Is the model converging faster than before?* Much faster! The previous model took 27 epochs to reach the lowest validation loss, while the new model achieved that same loss in just 5 epochs and continued to make progress until the 16th epoch. The BN layers stabilized training and allowed us to use a much larger learning rate, so convergence was faster.\n",
 								    "* *Does BN produce a better model?* Yes! The final model is also much better, with 54.0% accuracy instead of 47.6%. It's still not a very good model, but at least it's much better than before (a Convolutional Neural Network would do much better, but that's a different topic, see chapter 14).\n",
 								    "* *How does BN affect training speed?* Although the model converged much faster, each epoch took about 12s instead of 8s, because of the extra computations required by the BN layers. But overall the training time (wall time) was shortened significantly!"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
-												Sync chapter 11 notebook with the code samples in that chapter

											
										
										
											2017-06-05 18:48:03 +02:00
+								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "### d.\n",
 								    "*Exercise: Try replacing Batch Normalization with SELU, and make the necessary adjustements to ensure the network self-normalizes (i.e., standardize the input features, use LeCun normal initialization, make sure the DNN contains only a sequence of dense layers, etc.).*"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 136,
 								   "metadata": {
 								    "scrolled": true
 								   },
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
 								    "for _ in range(20):\n",
 								    "    model.add(keras.layers.Dense(100,\n",
 								    "                                 kernel_initializer=\"lecun_normal\",\n",
 								    "                                 activation=\"selu\"))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
 								    "\n",
 								    "optimizer = keras.optimizers.Nadam(lr=7e-4)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=optimizer,\n",
 								    "              metrics=[\"accuracy\"])\n",
 								    "\n",
 								    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
 								    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_selu_model.h5\", save_best_only=True)\n",
 								    "run_index = 1 # increment every time you train the model\n",
 								    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_selu_{:03d}\".format(run_index))\n",
 								    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
 								    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n",
 								    "\n",
 								    "X_means = X_train.mean(axis=0)\n",
 								    "X_stds = X_train.std(axis=0)\n",
 								    "X_train_scaled = (X_train - X_means) / X_stds\n",
 								    "X_valid_scaled = (X_valid - X_means) / X_stds\n",
 								    "X_test_scaled = (X_test - X_means) / X_stds\n",
 								    "\n",
 								    "model.fit(X_train_scaled, y_train, epochs=100,\n",
 								    "          validation_data=(X_valid_scaled, y_valid),\n",
 								    "          callbacks=callbacks)\n",
 								    "\n",
 								    "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n",
 								    "model.evaluate(X_valid_scaled, y_valid)"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 137,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n",
 								    "model.evaluate(X_valid_scaled, y_valid)"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "We get 47.9% accuracy, which is not much better than the original model (47.6%), and not as good as the model using batch normalization (54.0%). However, convergence was almost as fast as with the BN model, plus each epoch took only 7 seconds. So it's by far the fastest model to train so far."
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "### e.\n",
 								    "*Exercise: Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.*"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 138,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
 								    "for _ in range(20):\n",
 								    "    model.add(keras.layers.Dense(100,\n",
 								    "                                 kernel_initializer=\"lecun_normal\",\n",
 								    "                                 activation=\"selu\"))\n",
 								    "\n",
 								    "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
 								    "\n",
 								    "optimizer = keras.optimizers.Nadam(lr=5e-4)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=optimizer,\n",
 								    "              metrics=[\"accuracy\"])\n",
 								    "\n",
 								    "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n",
 								    "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_alpha_dropout_model.h5\", save_best_only=True)\n",
 								    "run_index = 1 # increment every time you train the model\n",
 								    "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_alpha_dropout_{:03d}\".format(run_index))\n",
 								    "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
 								    "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n",
 								    "\n",
 								    "X_means = X_train.mean(axis=0)\n",
 								    "X_stds = X_train.std(axis=0)\n",
 								    "X_train_scaled = (X_train - X_means) / X_stds\n",
 								    "X_valid_scaled = (X_valid - X_means) / X_stds\n",
 								    "X_test_scaled = (X_test - X_means) / X_stds\n",
 								    "\n",
 								    "model.fit(X_train_scaled, y_train, epochs=100,\n",
 								    "          validation_data=(X_valid_scaled, y_valid),\n",
 								    "          callbacks=callbacks)\n",
 								    "\n",
 								    "model = keras.models.load_model(\"my_cifar10_alpha_dropout_model.h5\")\n",
 								    "model.evaluate(X_valid_scaled, y_valid)"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "The model reaches 48.9% accuracy on the validation set. That's very slightly better than without dropout (47.6%). With an extensive hyperparameter search, it might be possible to do better (I tried dropout rates of 5%, 10%, 20% and 40%, and learning rates 1e-4, 3e-4, 5e-4, and 1e-3), but probably not much better in this case."
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "Let's use MC Dropout now. We will need the `MCAlphaDropout` class we used earlier, so let's just copy it here for convenience:"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 139,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "class MCAlphaDropout(keras.layers.AlphaDropout):\n",
 								    "    def call(self, inputs):\n",
 								    "        return super().call(inputs, training=True)"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "cell_type": "markdown",
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "Now let's create a new model, identical to the one we just trained (with the same weights), but with `MCAlphaDropout` dropout layers instead of `AlphaDropout` layers:"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 140,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
 								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "mc_model = keras.models.Sequential([\n",
 								    "    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n",
 								    "    for layer in model.layers\n",
 								    "])"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "Then let's add a couple utility functions. The first will run the model many times (10 by default) and it will return the mean predicted class probabilities. The second will use these mean probabilities to predict the most likely class for each instance:"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 141,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "def mc_dropout_predict_probas(mc_model, X, n_samples=10):\n",
 								    "    Y_probas = [mc_model.predict(X) for sample in range(n_samples)]\n",
 								    "    return np.mean(Y_probas, axis=0)\n",
 								    "\n",
 								    "def mc_dropout_predict_classes(mc_model, X, n_samples=10):\n",
 								    "    Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)\n",
 								    "    return np.argmax(Y_probas, axis=1)"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "Now let's make predictions for all the instances in the validation set, and compute the accuracy:"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 142,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
 								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)\n",
 								    "accuracy = np.mean(y_pred == y_valid[:, 0])\n",
 								    "accuracy"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "We get no accuracy improvement in this case (we're still at 48.9% accuracy).\n",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "\n",
 								    "So the best model we got in this exercise is the Batch Normalization model."
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "### f.\n",
 								    "*Exercise: Retrain your model using 1cycle scheduling and see if it improves training speed and model accuracy.*"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 143,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "outputs": [],
 								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
 								    "for _ in range(20):\n",
 								    "    model.add(keras.layers.Dense(100,\n",
 								    "                                 kernel_initializer=\"lecun_normal\",\n",
 								    "                                 activation=\"selu\"))\n",
 								    "\n",
 								    "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
 								    "\n",
 								    "optimizer = keras.optimizers.SGD(lr=1e-3)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=optimizer,\n",
 								    "              metrics=[\"accuracy\"])"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 144,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "batch_size = 128\n",
 								    "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n",
 								    "plot_lr_vs_loss(rates, losses)\n",
 								    "plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])"
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 145,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "keras.backend.clear_session()\n",
 								    "tf.random.set_seed(42)\n",
 								    "np.random.seed(42)\n",
 								    "\n",
 								    "model = keras.models.Sequential()\n",
 								    "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n",
 								    "for _ in range(20):\n",
 								    "    model.add(keras.layers.Dense(100,\n",
 								    "                                 kernel_initializer=\"lecun_normal\",\n",
 								    "                                 activation=\"selu\"))\n",
 								    "\n",
 								    "model.add(keras.layers.AlphaDropout(rate=0.1))\n",
 								    "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n",
 								    "\n",
 								    "optimizer = keras.optimizers.SGD(lr=1e-2)\n",
 								    "model.compile(loss=\"sparse_categorical_crossentropy\",\n",
 								    "              optimizer=optimizer,\n",
 								    "              metrics=[\"accuracy\"])"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "code",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "execution_count": 146,
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Fix JSON formatting in new comment in the ch 11 notebook

											
										
										
											2017-11-03 13:43:56 +01:00
+								   "outputs": [],
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								   "source": [
 								    "n_epochs = 15\n",
-												Adjust computation of steps per epoch

The number of steps per epoch is ceil(len(X) / batch_size) rather than floor(len(X) / batch_size). This change also means we do not have to take max(rate, self.last_rate) in the last steps of the OneCycleScheduler.

											
										
										
											2020-09-25 14:25:17 +02:00
+								    "onecycle = OneCycleScheduler(math.ceil(len(X_train_scaled) / batch_size) * n_epochs, max_rate=0.05)\n",
-												Add solution to the chapter 11's coding exercises, fixes #102 and fixes #120

											
										
										
											2020-03-10 21:55:45 +01:00
+								    "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n",
 								    "                    validation_data=(X_valid_scaled, y_valid),\n",
 								    "                    callbacks=[onecycle])"
 								   ]
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								  },
 								  {
 								   "cell_type": "markdown",
-												Add SELU activation function example and snip out repetitive outputs

											
										
										
											2017-06-21 15:35:47 +02:00
+								   "metadata": {},
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   "source": [
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								    "One cycle allowed us to train the model in just 15 epochs, each taking only 2 seconds (thanks to the larger batch size). This is several times faster than the fastest model we trained so far. Moreover, we improved the model's performance (from 47.6% to 52.0%). The batch normalized model reaches a slightly better performance (54%), but it's much slower to train."
-												Add exercise solutions for chapter 11

											
										
										
											2017-06-14 09:09:23 +02:00
+								   ]
 								  },
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  {
 								   "cell_type": "code",
 								   "execution_count": null,
-												Updated to latest version of TensorFlow

											
										
										
											2018-05-08 20:21:23 +02:00
+								   "metadata": {},
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								   "outputs": [],
 								   "source": []
 								  }
 								 ],
 								 "metadata": {
 								  "kernelspec": {
 								   "display_name": "Python 3",
 								   "language": "python",
 								   "name": "python3"
 								  },
 								  "language_info": {
 								   "codemirror_mode": {
 								    "name": "ipython",
 								    "version": 3
 								   },
 								   "file_extension": ".py",
 								   "mimetype": "text/x-python",
 								   "name": "python",
 								   "nbconvert_exporter": "python",
 								   "pygments_lexer": "ipython3",
-												Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1

											
										
										
											2021-02-14 03:02:09 +01:00
+								   "version": "3.7.9"
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								  },
 								  "nav_menu": {
 								   "height": "360px",
 								   "width": "416px"
 								  },
 								  "toc": {
 								   "navigate_menu": true,
 								   "number_sections": true,
 								   "sideBar": true,
 								   "threshold": 6,
 								   "toc_cell": false,
 								   "toc_section_display": "block",
 								   "toc_window_display": false
 								  }
 								 },
 								 "nbformat": 4,
-												Update notebooks to latest nbformat

											
										
										
											2020-04-06 09:13:12 +02:00
+								 "nbformat_minor": 4
-												Add notebooks for chapters 5 to 14

											
										
										
											2016-09-27 23:31:21 +02:00
+								}