handson-ml/12_custom_models_and_traini...

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Chapter 12 – Custom Models and Training with TensorFlow**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_This notebook contains all the sample code in chapter 12._"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Python ≥3.5 is required\n",
    "import sys\n",
    "assert sys.version_info >= (3, 5)\n",
    "\n",
    "# Scikit-Learn ≥0.20 is required\n",
    "import sklearn\n",
    "assert sklearn.__version__ >= \"0.20\"\n",
    "\n",
    "# TensorFlow ≥2.0-preview is required\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "assert tf.__version__ >= \"2.0\"\n",
    "\n",
    "# Common imports\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "# to make this notebook's output stable across runs\n",
    "np.random.seed(42)\n",
    "\n",
    "# To plot pretty figures\n",
    "%matplotlib inline\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "mpl.rc('axes', labelsize=14)\n",
    "mpl.rc('xtick', labelsize=12)\n",
    "mpl.rc('ytick', labelsize=12)\n",
    "\n",
    "# Where to save the figures\n",
    "PROJECT_ROOT_DIR = \".\"\n",
    "CHAPTER_ID = \"deep\"\n",
    "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
    "os.makedirs(IMAGES_PATH, exist_ok=True)\n",
    "\n",
    "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
    "    path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
    "    print(\"Saving figure\", fig_id)\n",
    "    if tight_layout:\n",
    "        plt.tight_layout()\n",
    "    plt.savefig(path, format=fig_extension, dpi=resolution)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tensors and operations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.constant([[1., 2., 3.], [4., 5., 6.]]) # matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.constant(42) # scalar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = tf.constant([[1., 2., 3.], [4., 5., 6.]])\n",
    "t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "t.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "t.dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "t[:, 1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "t[..., 1, tf.newaxis]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ops"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "t + 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.square(t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "t @ tf.transpose(t)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using `keras.backend`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow import keras\n",
    "K = keras.backend\n",
    "K.square(K.transpose(t)) + 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### From/To NumPy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.array([2., 4., 5.])\n",
    "tf.constant(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "t.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.array(t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.square(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.square(t)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Conflicting Types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    tf.constant(2.0) + tf.constant(40)\n",
    "except tf.errors.InvalidArgumentError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    tf.constant(2.0) + tf.constant(40., dtype=tf.float64)\n",
    "except tf.errors.InvalidArgumentError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "t2 = tf.constant(40., dtype=tf.float64)\n",
    "tf.constant(2.0) + tf.cast(t2, tf.float32)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Strings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.constant(b\"hello world\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.constant(\"café\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "u = tf.constant([ord(c) for c in \"café\"])\n",
    "u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "b = tf.strings.unicode_encode(u, \"UTF-8\")\n",
    "tf.strings.length(b, unit=\"UTF8_CHAR\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.strings.unicode_decode(b, \"UTF-8\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### String arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = tf.constant([\"Café\", \"Coffee\", \"caffè\", \"咖啡\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.strings.length(p, unit=\"UTF8_CHAR\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "r = tf.strings.unicode_decode(p, \"UTF8\")\n",
    "r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(r)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ragged tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(r[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(r[1:3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "r2 = tf.ragged.constant([[65, 66], [], [67]])\n",
    "print(tf.concat([r, r2], axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "r3 = tf.ragged.constant([[68, 69, 70], [71], [], [72, 73]])\n",
    "print(tf.concat([r, r3], axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.strings.unicode_encode(r3, \"UTF-8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "r.to_tensor()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sparse tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "s = tf.SparseTensor(indices=[[0, 1], [1, 0], [2, 3]],\n",
    "                    values=[1., 2., 3.],\n",
    "                    dense_shape=[3, 4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.sparse.to_dense(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "s2 = s * 2.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    s3 = s + 1.\n",
    "except TypeError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "s4 = tf.constant([[10., 20.], [30., 40.], [50., 60.], [70., 80.]])\n",
    "tf.sparse.sparse_dense_matmul(s, s4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "s5 = tf.SparseTensor(indices=[[0, 2], [0, 1]],\n",
    "                     values=[1., 2.],\n",
    "                     dense_shape=[3, 4])\n",
    "print(s5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    tf.sparse.to_dense(s5)\n",
    "except tf.errors.InvalidArgumentError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "s6 = tf.sparse.reorder(s5)\n",
    "tf.sparse.to_dense(s6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "set1 = tf.constant([[2, 3, 5, 7], [7, 9, 0, 0]])\n",
    "set2 = tf.constant([[4, 5, 6], [9, 10, 0]])\n",
    "tf.sparse.to_dense(tf.sets.union(set1, set2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.sparse.to_dense(tf.sets.difference(set1, set2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.sparse.to_dense(tf.sets.intersection(set1, set2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "v.assign(2 * v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "v[0, 1].assign(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "v[:, 2].assign([0., 1.])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    v[1] = [7., 8., 9.]\n",
    "except TypeError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "v.scatter_nd_update(indices=[[0, 0], [1, 2]],\n",
    "                    updates=[100., 200.])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "sparse_delta = tf.IndexedSlices(values=[[1., 2., 3.], [4., 5., 6.]],\n",
    "                                indices=[1, 0])\n",
    "v.scatter_update(sparse_delta)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tensor Arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "array = tf.TensorArray(dtype=tf.float32, size=3)\n",
    "array = array.write(0, tf.constant([1., 2.]))\n",
    "array = array.write(1, tf.constant([3., 10.]))\n",
    "array = array.write(2, tf.constant([5., 7.]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "array.read(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "array.stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean, variance = tf.nn.moments(array.stack(), axes=0)\n",
    "mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "variance"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom loss function"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's start by loading and preparing the California housing dataset. We first load it, then split it into a training set, a validation set and a test set, and finally we scale it:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "housing = fetch_california_housing()\n",
    "X_train_full, X_test, y_train_full, y_test = train_test_split(\n",
    "    housing.data, housing.target.reshape(-1, 1), random_state=42)\n",
    "X_train, X_valid, y_train, y_valid = train_test_split(\n",
    "    X_train_full, y_train_full, random_state=42)\n",
    "\n",
    "scaler = StandardScaler()\n",
    "X_train_scaled = scaler.fit_transform(X_train)\n",
    "X_valid_scaled = scaler.transform(X_valid)\n",
    "X_test_scaled = scaler.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def huber_fn(y_true, y_pred):\n",
    "    error = y_true - y_pred\n",
    "    is_small_error = tf.abs(error) < 1\n",
    "    squared_loss = tf.square(error) / 2\n",
    "    linear_loss  = tf.abs(error) - 0.5\n",
    "    return tf.where(is_small_error, squared_loss, linear_loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(8, 3.5))\n",
    "z = np.linspace(-4, 4, 200)\n",
    "plt.plot(z, huber_fn(0, z), \"b-\", linewidth=2, label=\"huber($z$)\")\n",
    "plt.plot(z, z**2 / 2, \"b:\", linewidth=1, label=r\"$\\frac{1}{2}z^2$\")\n",
    "plt.plot([-1, -1], [0, huber_fn(0., -1.)], \"r--\")\n",
    "plt.plot([1, 1], [0, huber_fn(0., 1.)], \"r--\")\n",
    "plt.gca().axhline(y=0, color='k')\n",
    "plt.gca().axvline(x=0, color='k')\n",
    "plt.axis([-4, 4, 0, 4])\n",
    "plt.grid(True)\n",
    "plt.xlabel(\"$z$\")\n",
    "plt.legend(fontsize=14)\n",
    "plt.title(\"Huber loss\", fontsize=14)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_shape = X_train.shape[1:]\n",
    "\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=huber_fn, optimizer=\"nadam\", metrics=[\"mae\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Saving/Loading Models with Custom Objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_a_custom_loss.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.load_model(\"my_model_with_a_custom_loss.h5\",\n",
    "                                custom_objects={\"huber_fn\": huber_fn})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_huber(threshold=1.0):\n",
    "    def huber_fn(y_true, y_pred):\n",
    "        error = y_true - y_pred\n",
    "        is_small_error = tf.abs(error) < threshold\n",
    "        squared_loss = tf.square(error) / 2\n",
    "        linear_loss  = threshold * tf.abs(error) - threshold**2 / 2\n",
    "        return tf.where(is_small_error, squared_loss, linear_loss)\n",
    "    return huber_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[\"mae\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_a_custom_loss_threshold_2.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.load_model(\"my_model_with_a_custom_loss_threshold_2.h5\",\n",
    "                                custom_objects={\"huber_fn\": create_huber(2.0)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HuberLoss(keras.losses.Loss):\n",
    "    def __init__(self, threshold=1.0, **kwargs):\n",
    "        self.threshold = threshold\n",
    "        super().__init__(**kwargs)\n",
    "    def call(self, y_true, y_pred):\n",
    "        error = y_true - y_pred\n",
    "        is_small_error = tf.abs(error) < self.threshold\n",
    "        squared_loss = tf.square(error) / 2\n",
    "        linear_loss  = self.threshold * tf.abs(error) - self.threshold**2 / 2\n",
    "        return tf.where(is_small_error, squared_loss, linear_loss)\n",
    "    def get_config(self):\n",
    "        base_config = super().get_config()\n",
    "        return {**base_config, \"threshold\": self.threshold}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=HuberLoss(2.), optimizer=\"nadam\", metrics=[\"mae\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_a_custom_loss_class.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model = keras.models.load_model(\"my_model_with_a_custom_loss_class.h5\", # TODO: check PR #25956\n",
    "#                                custom_objects={\"HuberLoss\": HuberLoss})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model = keras.models.load_model(\"my_model_with_a_custom_loss_class.h5\",  # TODO: check PR #25956\n",
    "#                                custom_objects={\"HuberLoss\": HuberLoss})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.loss.threshold"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Other Custom Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "def my_softplus(z): # return value is just tf.nn.softplus(z)\n",
    "    return tf.math.log(tf.exp(z) + 1.0)\n",
    "\n",
    "def my_glorot_initializer(shape, dtype=tf.float32):\n",
    "    stddev = tf.sqrt(2. / (shape[0] + shape[1]))\n",
    "    return tf.random.normal(shape, stddev=stddev, dtype=dtype)\n",
    "\n",
    "def my_l1_regularizer(weights):\n",
    "    return tf.reduce_sum(tf.abs(0.01 * weights))\n",
    "\n",
    "def my_positive_weights(weights): # return value is just tf.nn.relu(weights)\n",
    "    return tf.where(weights < 0., tf.zeros_like(weights), weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "layer = keras.layers.Dense(1, activation=my_softplus,\n",
    "                           kernel_initializer=my_glorot_initializer,\n",
    "                           kernel_regularizer=my_l1_regularizer,\n",
    "                           kernel_constraint=my_positive_weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1, activation=my_softplus,\n",
    "                       kernel_regularizer=my_l1_regularizer,\n",
    "                       kernel_constraint=my_positive_weights,\n",
    "                       kernel_initializer=my_glorot_initializer),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[\"mae\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_many_custom_parts.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: \n",
    "\"\"\"\n",
    "model = keras.models.load_model(\n",
    "    \"my_model_with_many_custom_parts.h5\",\n",
    "    custom_objects={\n",
    "       \"my_l1_regularizer\": my_l1_regularizer(0.01),\n",
    "       \"my_positive_weights\": my_positive_weights,\n",
    "       \"my_glorot_initializer\": my_glorot_initializer,\n",
    "       \"my_softplus\": my_softplus,\n",
    "    })\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyL1Regularizer(keras.regularizers.Regularizer):\n",
    "    def __init__(self, factor):\n",
    "        self.factor = factor\n",
    "    def __call__(self, weights):\n",
    "        return tf.reduce_sum(tf.abs(self.factor * weights))\n",
    "    def get_config(self):\n",
    "        return {\"factor\": self.factor}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1, activation=my_softplus,\n",
    "                       kernel_regularizer=MyL1Regularizer(0.01),\n",
    "                       kernel_constraint=my_positive_weights,\n",
    "                       kernel_initializer=my_glorot_initializer),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[\"mae\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_many_custom_parts.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: check https://github.com/tensorflow/tensorflow/issues/26061\n",
    "\"\"\"\n",
    "model = keras.models.load_model(\n",
    "    \"my_model_with_many_custom_parts.h5\",\n",
    "    custom_objects={\n",
    "       \"MyL1Regularizer\": MyL1Regularizer,\n",
    "       \"my_positive_weights\": my_positive_weights,\n",
    "       \"my_glorot_initializer\": my_glorot_initializer,\n",
    "       \"my_softplus\": my_softplus,\n",
    "    })\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[create_huber(2.0)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Warning**: if you use the same function as the loss and a metric, you may be surprised to see different results. This is generally just due to floating point precision errors: even though the mathematical equations are equivalent, the operations are not run in the same order, which can lead to small differences. Moreover, when using sample weights, there's more than just precision errors:\n",
    "* the loss since the start of the epoch is the mean of all batch losses seen so far. Each batch loss is the sum of the weighted instance losses divided by the _batch size_ (not the sum of weights, so the batch loss is _not_ the weighted mean of the losses).\n",
    "* the metric since the start of the epoch is equal to the sum of weighted instance losses divided by sum of all weights seen so far. In other words, it is the weighted mean of all the instance losses. Not the same thing.\n",
    "\n",
    "If you do the math, you will find that metric = loss * mean of sample weights (plus some floating point precision error)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[create_huber(2.0)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_weight = np.random.rand(len(y_train))\n",
    "history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "history.history[\"loss\"][0], history.history[\"huber_fn\"][0] * sample_weight.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Streaming metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "precision = keras.metrics.Precision()\n",
    "precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "precision.result()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "precision.variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "precision.reset_states()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Creating a streaming metric:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HuberMetric(keras.metrics.Metric):\n",
    "    def __init__(self, threshold=1.0, **kwargs):\n",
    "        super().__init__(**kwargs) # handles base args (e.g., dtype)\n",
    "        self.threshold = threshold\n",
    "        self.huber_fn = create_huber(threshold)\n",
    "        self.total = self.add_weight(\"total\", initializer=\"zeros\")\n",
    "        self.count = self.add_weight(\"count\", initializer=\"zeros\")\n",
    "    def update_state(self, y_true, y_pred, sample_weight=None):\n",
    "        metric = self.huber_fn(y_true, y_pred)\n",
    "        self.total.assign_add(tf.reduce_sum(metric))\n",
    "        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))\n",
    "    def result(self):\n",
    "        return self.total / self.count\n",
    "    def get_config(self):\n",
    "        base_config = super().get_config()\n",
    "        return {**base_config, \"threshold\": self.threshold}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "m = HuberMetric(2.)\n",
    "\n",
    "# total = 2 * |10 - 2| - 2²/2 = 14\n",
    "# count = 1\n",
    "# result = 14 / 1 = 14\n",
    "m(tf.constant([[2.]]), tf.constant([[10.]])) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "# total = total + (|1 - 0|² / 2) + (2 * |9.25 - 5| - 2² / 2) = 14 + 7 = 21\n",
    "# count = count + 2 = 3\n",
    "# result = total / count = 21 / 3 = 7\n",
    "m(tf.constant([[0.], [5.]]), tf.constant([[1.], [9.25]]))\n",
    "\n",
    "m.result()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "m.variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "m.reset_states()\n",
    "m.variables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's check that the `HuberMetric` class works well:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[HuberMetric(2.0)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_a_custom_metric.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model = keras.models.load_model(\"my_model_with_a_custom_metric.h5\",           # TODO: check PR #25956\n",
    "#                                custom_objects={\"huber_fn\": create_huber(2.0),\n",
    "#                                                \"HuberMetric\": HuberMetric})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.metrics[0].threshold"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looks like it works fine! More simply, we could have created the class like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HuberMetric(keras.metrics.Mean):\n",
    "    def __init__(self, threshold=1.0, name='HuberMetric', dtype=None):\n",
    "        self.threshold = threshold\n",
    "        self.huber_fn = create_huber(threshold)\n",
    "        super().__init__(name=name, dtype=dtype)\n",
    "    def update_state(self, y_true, y_pred, sample_weight=None):\n",
    "        metric = self.huber_fn(y_true, y_pred)\n",
    "        super(HuberMetric, self).update_state(metric, sample_weight)\n",
    "    def get_config(self):\n",
    "        base_config = super().get_config()\n",
    "        return {**base_config, \"threshold\": self.threshold}        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This class handles shapes better, and it also supports sample weights."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
    "                       input_shape=input_shape),\n",
    "    keras.layers.Dense(1),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=keras.losses.Huber(2.0), optimizer=\"nadam\", weighted_metrics=[HuberMetric(2.0)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sample_weight = np.random.rand(len(y_train))\n",
    "history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "history.history[\"loss\"][0], history.history[\"HuberMetric\"][0] * sample_weight.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_a_custom_metric_v2.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model = keras.models.load_model(\"my_model_with_a_custom_metric_v2.h5\",        # TODO: check PR #25956\n",
    "#                                custom_objects={\"HuberMetric\": HuberMetric})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "model.metrics[0].threshold"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom Layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "exponential_layer = keras.layers.Lambda(lambda x: tf.exp(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "exponential_layer([-1., 0., 1.])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Adding an exponential layer at the output of a regression model can be useful if the values to predict are positive and with very different scales (e.g., 0.001, 10., 10000):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"relu\", input_shape=input_shape),\n",
    "    keras.layers.Dense(1),\n",
    "    exponential_layer\n",
    "])\n",
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "model.fit(X_train_scaled, y_train, epochs=5,\n",
    "          validation_data=(X_valid_scaled, y_valid))\n",
    "model.evaluate(X_test_scaled, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyDense(keras.layers.Layer):\n",
    "    def __init__(self, units, activation=None, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.units = units\n",
    "        self.activation = keras.activations.get(activation)\n",
    "\n",
    "    def build(self, batch_input_shape):\n",
    "        self.kernel = self.add_weight(\n",
    "            name=\"kernel\", shape=[batch_input_shape[-1], self.units],\n",
    "            initializer=\"glorot_normal\")\n",
    "        self.bias = self.add_weight(\n",
    "            name=\"bias\", shape=[self.units], initializer=\"zeros\")\n",
    "        super().build(batch_input_shape) # must be at the end\n",
    "\n",
    "    def call(self, X):\n",
    "        return self.activation(X @ self.kernel + self.bias)\n",
    "\n",
    "    def compute_output_shape(self, batch_input_shape):\n",
    "        return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])\n",
    "\n",
    "    def get_config(self):\n",
    "        base_config = super().get_config()\n",
    "        return {**base_config, \"units\": self.units,\n",
    "                \"activation\": keras.activations.serialize(self.activation)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([\n",
    "    MyDense(30, activation=\"relu\", input_shape=input_shape),\n",
    "    MyDense(1)\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))\n",
    "model.evaluate(X_test_scaled, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"my_model_with_a_custom_layer.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.load_model(\"my_model_with_a_custom_layer.h5\",\n",
    "                                custom_objects={\"MyDense\": MyDense})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyMultiLayer(keras.layers.Layer):\n",
    "    def call(self, X):\n",
    "        X1, X2 = X\n",
    "        return X1 + X2, X1 * X2\n",
    "\n",
    "    def compute_output_shape(self, batch_input_shape):\n",
    "        batch_input_shape1, batch_input_shape2 = batch_input_shape\n",
    "        return [batch_input_shape1, batch_input_shape2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs1 = keras.layers.Input(shape=[2])\n",
    "inputs2 = keras.layers.Input(shape=[2])\n",
    "outputs1, outputs2 = MyMultiLayer()((inputs1, inputs2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's create a layer with a different behavior during training and testing:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AddGaussianNoise(keras.layers.Layer):\n",
    "    def __init__(self, stddev, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.stddev = stddev\n",
    "\n",
    "    def call(self, X, training=None):\n",
    "        if training:\n",
    "            noise = tf.random.normal(tf.shape(X), stddev=self.stddev)\n",
    "            return X + noise\n",
    "        else:\n",
    "            return X\n",
    "\n",
    "    def compute_output_shape(self, batch_input_shape):\n",
    "        return batch_input_shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))\n",
    "model.evaluate(X_test_scaled, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_new_scaled = X_test_scaled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ResidualBlock(keras.layers.Layer):\n",
    "    def __init__(self, n_layers, n_neurons, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.n_layers = n_layers                                     # not shown in the book\n",
    "        self.n_neurons = n_neurons                                   # not shown\n",
    "        self.hidden = [keras.layers.Dense(n_neurons, activation=\"elu\",\n",
    "                                          kernel_initializer=\"he_normal\")\n",
    "                       for _ in range(n_layers)]\n",
    "\n",
    "    def call(self, inputs):\n",
    "        Z = inputs\n",
    "        for layer in self.hidden:\n",
    "            Z = layer(Z)\n",
    "        return inputs + Z\n",
    "    \n",
    "    def get_config(self):                                            # not shown\n",
    "        base_config = super().get_config()                           # not shown\n",
    "        return {**base_config,                                       # not shown\n",
    "                \"n_layers\": self.n_layers, \"n_neurons\": n_neurons}   # not shown"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ResidualRegressor(keras.models.Model):\n",
    "    def __init__(self, output_dim, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.output_dim = output_dim                                 # not shown in the book\n",
    "        self.hidden1 = keras.layers.Dense(30, activation=\"elu\",\n",
    "                                          kernel_initializer=\"he_normal\")\n",
    "        self.block1 = ResidualBlock(2, 30)\n",
    "        self.block2 = ResidualBlock(2, 30)\n",
    "        self.out = keras.layers.Dense(output_dim)\n",
    "\n",
    "    def call(self, inputs):\n",
    "        Z = self.hidden1(inputs)\n",
    "        for _ in range(1 + 3):\n",
    "            Z = self.block1(Z)\n",
    "        Z = self.block2(Z)\n",
    "        return self.out(Z)\n",
    "\n",
    "    def get_config(self):                                            # not shown\n",
    "        base_config = super().get_config()                           # not shown\n",
    "        return {**base_config,                                       # not shown\n",
    "                \"output_dim\": self.output_dim}                       # not shown"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = ResidualRegressor(1)\n",
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "history = model.fit(X_train_scaled, y_train, epochs=5)\n",
    "score = model.evaluate(X_test_scaled, y_test)\n",
    "y_pred = model.predict(X_new_scaled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "#TODO: check that persistence ends up working in TF2\n",
    "#model.save(\"my_custom_model.h5\")\n",
    "#model = keras.models.load_model(\"my_custom_model.h5\",\n",
    "#                                custom_objects={\n",
    "#                                    \"ResidualBlock\": ResidualBlock,\n",
    "#                                    \"ResidualRegressor\": ResidualRegressor\n",
    "#                                })"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We could have defined the model using the sequential API instead:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "block1 = ResidualBlock(2, 30)\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"elu\", kernel_initializer=\"he_normal\"),\n",
    "    block1, block1, block1, block1,\n",
    "    ResidualBlock(2, 30),\n",
    "    keras.layers.Dense(1)\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "history = model.fit(X_train_scaled, y_train, epochs=5)\n",
    "score = model.evaluate(X_test_scaled, y_test)\n",
    "y_pred = model.predict(X_new_scaled)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Losses and Metrics Based on Model Internals"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "TODO: check https://github.com/tensorflow/tensorflow/issues/26260\n",
    "```python\n",
    "class ReconstructingRegressor(keras.models.Model):\n",
    "    def __init__(self, output_dim, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.hidden = [keras.layers.Dense(30, activation=\"selu\",\n",
    "                                          kernel_initializer=\"lecun_normal\")\n",
    "                       for _ in range(5)]\n",
    "        self.out = keras.layers.Dense(output_dim)\n",
    "        self.reconstruction_mean = keras.metrics.Mean(name=\"reconstruction_error\")\n",
    "\n",
    "    def build(self, batch_input_shape):\n",
    "        n_inputs = batch_input_shape[-1]\n",
    "        self.reconstruct = keras.layers.Dense(n_inputs)\n",
    "        super().build(batch_input_shape)\n",
    "\n",
    "    @tf.function\n",
    "    def call(self, inputs, training=None):\n",
    "        Z = inputs\n",
    "        for layer in self.hidden:\n",
    "            Z = layer(Z)\n",
    "        reconstruction = self.reconstruct(Z)\n",
    "        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))\n",
    "        self.add_loss(0.05 * reconstruction_loss)\n",
    "        if training:\n",
    "            result = self.reconstruction_mean(recon_loss)\n",
    "            self.add_metric(result)\n",
    "        return self.out(Z)\n",
    "\n",
    "model = ReconstructingRegressor(1)\n",
    "model.build(tf.TensorShape([None, 8]))       # <= Fails if this line is removed\n",
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "history = model.fit(X, y, epochs=2)\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ReconstructingRegressor(keras.models.Model):\n",
    "    def __init__(self, output_dim, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.hidden = [keras.layers.Dense(30, activation=\"selu\",\n",
    "                                          kernel_initializer=\"lecun_normal\")\n",
    "                       for _ in range(5)]\n",
    "        self.out = keras.layers.Dense(output_dim)\n",
    "\n",
    "    def build(self, batch_input_shape):\n",
    "        n_inputs = batch_input_shape[-1]\n",
    "        self.reconstruct = keras.layers.Dense(n_inputs)\n",
    "        super().build(batch_input_shape)\n",
    "\n",
    "    def call(self, inputs):\n",
    "        Z = inputs\n",
    "        for layer in self.hidden:\n",
    "            Z = layer(Z)\n",
    "        reconstruction = self.reconstruct(Z)\n",
    "        recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))\n",
    "        self.add_loss(0.05 * recon_loss)\n",
    "        return self.out(Z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = ReconstructingRegressor(1)\n",
    "model.build(tf.TensorShape([None, 8])) # TODO: check https://github.com/tensorflow/tensorflow/issues/26274\n",
    "model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
    "history = model.fit(X_train_scaled, y_train, epochs=2)\n",
    "y_pred = model.predict(X_test_scaled)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Computing Gradients Using Autodiff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f(w1, w2):\n",
    "    return 3 * w1 ** 2 + 2 * w1 * w2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "w1, w2 = 5, 3\n",
    "eps = 1e-6\n",
    "(f(w1 + eps, w2) - f(w1, w2)) / eps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "(f(w1, w2 + eps) - f(w1, w2)) / eps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [],
   "source": [
    "w1, w2 = tf.Variable(5.), tf.Variable(3.)\n",
    "with tf.GradientTape() as tape:\n",
    "    z = f(w1, w2)\n",
    "\n",
    "gradients = tape.gradient(z, [w1, w2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [],
   "source": [
    "gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.GradientTape() as tape:\n",
    "    z = f(w1, w2)\n",
    "\n",
    "dz_dw1 = tape.gradient(z, w1)\n",
    "try:\n",
    "    dz_dw2 = tape.gradient(z, w2)\n",
    "except RuntimeError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.GradientTape(persistent=True) as tape:\n",
    "    z = f(w1, w2)\n",
    "\n",
    "dz_dw1 = tape.gradient(z, w1)\n",
    "dz_dw2 = tape.gradient(z, w2) # works now!\n",
    "del tape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [],
   "source": [
    "dz_dw1, dz_dw2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [],
   "source": [
    "c1, c2 = tf.constant(5.), tf.constant(3.)\n",
    "with tf.GradientTape() as tape:\n",
    "    z = f(c1, c2)\n",
    "\n",
    "gradients = tape.gradient(z, [c1, c2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.GradientTape() as tape:\n",
    "    tape.watch(c1)\n",
    "    tape.watch(c2)\n",
    "    z = f(c1, c2)\n",
    "\n",
    "gradients = tape.gradient(z, [c1, c2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [],
   "source": [
    "gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.GradientTape() as tape:\n",
    "    z1 = f(w1, w2 + 2.)\n",
    "    z2 = f(w1, w2 + 5.)\n",
    "    z3 = f(w1, w2 + 7.)\n",
    "\n",
    "tape.gradient([z1, z2, z3], [w1, w2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.GradientTape(persistent=True) as tape:\n",
    "    z1 = f(w1, w2 + 2.)\n",
    "    z2 = f(w1, w2 + 5.)\n",
    "    z3 = f(w1, w2 + 7.)\n",
    "\n",
    "tf.reduce_sum(tf.stack([tape.gradient(z, [w1, w2]) for z in (z1, z2, z3)]), axis=0)\n",
    "del tape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [],
   "source": [
    "with tf.GradientTape(persistent=True) as hessian_tape:\n",
    "    with tf.GradientTape() as jacobian_tape:\n",
    "        z = f(w1, w2)\n",
    "    jacobians = jacobian_tape.gradient(z, [w1, w2])\n",
    "hessians = [hessian_tape.gradient(jacobian, [w1, w2])\n",
    "            for jacobian in jacobians]\n",
    "del hessian_tape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [],
   "source": [
    "jacobians"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [],
   "source": [
    "hessians"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f(w1, w2):\n",
    "    return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)\n",
    "\n",
    "with tf.GradientTape() as tape:\n",
    "    z = f(w1, w2)\n",
    "\n",
    "tape.gradient(z, [w1, w2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = tf.Variable(100.)\n",
    "with tf.GradientTape() as tape:\n",
    "    z = my_softplus(x)\n",
    "\n",
    "tape.gradient(z, [x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.math.log(tf.exp(tf.constant(30., dtype=tf.float32)) + 1.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = tf.Variable([100.])\n",
    "with tf.GradientTape() as tape:\n",
    "    z = my_softplus(x)\n",
    "\n",
    "tape.gradient(z, [x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.custom_gradient\n",
    "def my_better_softplus(z):\n",
    "    exp = tf.exp(z)\n",
    "    def my_softplus_gradients(grad):\n",
    "        return grad / (1 + 1 / exp)\n",
    "    return tf.math.log(exp + 1), my_softplus_gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "def my_better_softplus(z):\n",
    "    return tf.where(z > 30., z, tf.math.log(tf.exp(z) + 1.))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = tf.Variable([1000.])\n",
    "with tf.GradientTape() as tape:\n",
    "    z = my_better_softplus(x)\n",
    "\n",
    "z, tape.gradient(z, [x])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Computing Gradients Using Autodiff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [],
   "source": [
    "l2_reg = keras.regularizers.l2(0.05)\n",
    "model = keras.models.Sequential([\n",
    "    keras.layers.Dense(30, activation=\"elu\", kernel_initializer=\"he_normal\",\n",
    "                       kernel_regularizer=l2_reg),\n",
    "    keras.layers.Dense(1, kernel_regularizer=l2_reg)\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_batch(X, y, batch_size=32):\n",
    "    idx = np.random.randint(len(X), size=batch_size)\n",
    "    return X[idx], y[idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_status_bar(iteration, total, loss, metrics=None):\n",
    "    metrics = \" - \".join([\"{}: {:.4f}\".format(m.name, m.result())\n",
    "                         for m in [loss] + (metrics or [])])\n",
    "    end = \"\" if iteration < total else \"\\n\"\n",
    "    print(\"\\r{}/{} - \".format(iteration, total) + metrics,\n",
    "          end=end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "mean_loss = keras.metrics.Mean(name=\"loss\")\n",
    "mean_square = keras.metrics.Mean(name=\"mean_square\")\n",
    "for i in range(1, 50 + 1):\n",
    "    loss = 1 / i\n",
    "    mean_loss(loss)\n",
    "    mean_square(i ** 2)\n",
    "    print_status_bar(i, 50, mean_loss, [mean_square])\n",
    "    time.sleep(0.05)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A fancier version with a progress bar:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [],
   "source": [
    "def progress_bar(iteration, total, size=30):\n",
    "    running = iteration < total\n",
    "    c = \">\" if running else \"=\"\n",
    "    p = (size - 1) * iteration // total\n",
    "    fmt = \"{{:-{}d}}/{{}} [{{}}]\".format(len(str(total)))\n",
    "    params = [iteration, total, \"=\" * p + c + \".\" * (size - p - 1)]\n",
    "    return fmt.format(*params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {},
   "outputs": [],
   "source": [
    "progress_bar(3500, 10000, size=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_status_bar(iteration, total, loss, metrics=None, size=30):\n",
    "    metrics = \" - \".join([\"{}: {:.4f}\".format(m.name, m.result())\n",
    "                         for m in [loss] + (metrics or [])])\n",
    "    end = \"\" if iteration < total else \"\\n\"\n",
    "    print(\"\\r{} - {}\".format(progress_bar(iteration, total), metrics), end=end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_loss = keras.metrics.Mean(name=\"loss\")\n",
    "mean_square = keras.metrics.Mean(name=\"mean_square\")\n",
    "for i in range(1, 50 + 1):\n",
    "    loss = 1 / i\n",
    "    mean_loss(loss)\n",
    "    mean_square(i ** 2)\n",
    "    print_status_bar(i, 50, mean_loss, [mean_square])\n",
    "    time.sleep(0.05)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_epochs = 5\n",
    "batch_size = 32\n",
    "n_steps = len(X_train) // batch_size\n",
    "optimizer = keras.optimizers.Nadam(lr=0.01)\n",
    "loss_fn = keras.losses.mean_squared_error\n",
    "mean_loss = keras.metrics.Mean()\n",
    "metrics = [keras.metrics.MeanAbsoluteError()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {},
   "outputs": [],
   "source": [
    "for epoch in range(1, n_epochs + 1):\n",
    "    print(\"Epoch {}/{}\".format(epoch, n_epochs))\n",
    "    for step in range(1, n_steps + 1):\n",
    "        X_batch, y_batch = random_batch(X_train_scaled, y_train)\n",
    "        with tf.GradientTape() as tape:\n",
    "            y_pred = model(X_batch)\n",
    "            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
    "            loss = tf.add_n([main_loss] + model.losses)\n",
    "        gradients = tape.gradient(loss, model.trainable_variables)\n",
    "        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
    "        for variable in model.variables:\n",
    "            if variable.constraint is not None:\n",
    "                variable.assign(variable.constraint(variable))\n",
    "        mean_loss(loss)\n",
    "        for metric in metrics:\n",
    "            metric(y_batch, y_pred)\n",
    "        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)\n",
    "    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)\n",
    "    for metric in [mean_loss] + metrics:\n",
    "        metric.reset_states()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    from tqdm import tnrange\n",
    "    from collections import OrderedDict\n",
    "    with tnrange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
    "        for epoch in epochs:\n",
    "            with tnrange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
    "                for step in steps:\n",
    "                    X_batch, y_batch = random_batch(X_train_scaled, y_train)\n",
    "                    with tf.GradientTape() as tape:\n",
    "                        y_pred = model(X_batch)\n",
    "                        main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
    "                        loss = tf.add_n([main_loss] + model.losses)\n",
    "                    gradients = tape.gradient(loss, model.trainable_variables)\n",
    "                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
    "                    for variable in model.variables:\n",
    "                        if variable.constraint is not None:\n",
    "                            variable.assign(variable.constraint(variable))                    \n",
    "                    status = OrderedDict()\n",
    "                    mean_loss(loss)\n",
    "                    status[\"loss\"] = mean_loss.result().numpy()\n",
    "                    for metric in metrics:\n",
    "                        metric(y_batch, y_pred)\n",
    "                        status[metric.name] = metric.result().numpy()\n",
    "                    steps.set_postfix(status)\n",
    "            for metric in [mean_loss] + metrics:\n",
    "                metric.reset_states()\n",
    "except ImportError as ex:\n",
    "    print(\"To run this cell, please install tqdm, ipywidgets and restart Jupyter\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TensorFlow Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cube(x):\n",
    "    return x ** 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {},
   "outputs": [],
   "source": [
    "cube(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [],
   "source": [
    "cube(tf.constant(2.0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf_cube = tf.function(cube)\n",
    "tf_cube"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf_cube(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf_cube(tf.constant(2.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TF Functions and Concrete Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function = tf_cube.get_concrete_function(tf.constant(2.0))\n",
    "concrete_function.graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function(tf.constant(2.0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function is tf_cube.get_concrete_function(tf.constant(2.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exploring Function Definitions and Graphs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function.graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {},
   "outputs": [],
   "source": [
    "ops = concrete_function.graph.get_operations()\n",
    "ops"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [],
   "source": [
    "pow_op = ops[2]\n",
    "list(pow_op.inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {},
   "outputs": [],
   "source": [
    "pow_op.outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function.graph.get_operation_by_name('x')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function.graph.get_tensor_by_name('Identity:0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [],
   "source": [
    "concrete_function.function_def.signature"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How TF Functions Trace Python Functions to Extract Their Computation Graphs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def tf_cube(x):\n",
    "    print(\"print:\", x)\n",
    "    return x ** 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = tf_cube(tf.constant(2.0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = tf_cube(2)\n",
    "result = tf_cube(3)\n",
    "result = tf_cube(tf.constant([[1., 2.]])) # New shape: trace!\n",
    "result = tf_cube(tf.constant([[3., 4.], [5., 6.]])) # New shape: trace!\n",
    "result = tf_cube(tf.constant([[7., 8.], [9., 10.], [11., 12.]])) # no trace"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It is also possible to specify a particular input signature:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function(input_signature=[tf.TensorSpec([None, 28, 28], tf.float32)])\n",
    "def shrink(images):\n",
    "    print(\"Tracing\", images)\n",
    "    return images[:, ::2, ::2] # drop half the rows and columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {},
   "outputs": [],
   "source": [
    "img_batch_1 = tf.random.uniform(shape=[100, 28, 28])\n",
    "img_batch_2 = tf.random.uniform(shape=[50, 28, 28])\n",
    "preprocessed_images = shrink(img_batch_1) # Traces the function.\n",
    "preprocessed_images = shrink(img_batch_2) # Reuses the same concrete function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "metadata": {},
   "outputs": [],
   "source": [
    "img_batch_3 = tf.random.uniform(shape=[2, 2, 2])\n",
    "try:\n",
    "    preprocessed_images = shrink(img_batch_3)  # rejects unexpected types or shapes\n",
    "except ValueError as ex:\n",
    "    print(ex)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using Autograph To Capture Control Flow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A \"static\" `for` loop using `range()`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def add_10(x):\n",
    "    for i in range(10):\n",
    "        x += 1\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_10(tf.constant(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_10.get_concrete_function(tf.constant(5)).graph.get_operations()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A \"dynamic\" loop using `tf.while_loop()`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def add_10(x):\n",
    "    condition = lambda i, x: tf.less(i, 10)\n",
    "    body = lambda i, x: (tf.add(i, 1), tf.add(x, 1))\n",
    "    final_i, final_x = tf.while_loop(condition, body, [tf.constant(0), x])\n",
    "    return final_x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_10(tf.constant(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_10.get_concrete_function(tf.constant(5)).graph.get_operations()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A \"dynamic\" `for` loop using `tf.range()` (captured by autograph):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def add_10(x):\n",
    "    for i in tf.range(10):\n",
    "        x = x + 1\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [],
   "source": [
    "add_10.get_concrete_function(tf.constant(0)).graph.get_operations()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Handling Variables and Other Resources in TF Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = tf.Variable(0)\n",
    "\n",
    "@tf.function\n",
    "def increment(counter, c=1):\n",
    "    return counter.assign_add(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {},
   "outputs": [],
   "source": [
    "increment(counter)\n",
    "increment(counter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "metadata": {},
   "outputs": [],
   "source": [
    "function_def = increment.get_concrete_function(counter).function_def\n",
    "function_def.signature.input_arg[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = tf.Variable(0)\n",
    "\n",
    "@tf.function\n",
    "def increment(c=1):\n",
    "    return counter.assign_add(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "metadata": {},
   "outputs": [],
   "source": [
    "increment()\n",
    "increment()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "metadata": {},
   "outputs": [],
   "source": [
    "function_def = increment.get_concrete_function().function_def\n",
    "function_def.signature.input_arg[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Counter:\n",
    "    def __init__(self):\n",
    "        self.counter = tf.Variable(0)\n",
    "\n",
    "    @tf.function\n",
    "    def increment(self, c=1):\n",
    "        return self.counter.assign_add(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [],
   "source": [
    "c = Counter()\n",
    "c.increment()\n",
    "c.increment()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def add_10(x):\n",
    "    for i in tf.range(10):\n",
    "        x += 1\n",
    "    return x\n",
    "\n",
    "tf.autograph.to_code(add_10.python_function, experimental_optional_features=None)\n",
    "# TODO: experimental_optional_features is needed to have the same behavior as @tf.function,\n",
    "#       check that this is not needed when TF2 is released"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_tf_code(func, experimental_optional_features=None):\n",
    "    from IPython.display import display, Markdown\n",
    "    if hasattr(func, \"python_function\"):\n",
    "        func = func.python_function\n",
    "    code = tf.autograph.to_code(func, experimental_optional_features=experimental_optional_features)\n",
    "    display(Markdown('```python\\n{}\\n```'.format(code)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {},
   "outputs": [],
   "source": [
    "display_tf_code(add_10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using TF Functions with tf.keras (or Not)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By default, tf.keras will automatically convert your custom code into TF Functions, no need to use\n",
    "`tf.function()`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Custom loss function\n",
    "def my_mse(y_true, y_pred):\n",
    "    print(\"Tracing loss my_mse()\")\n",
    "    return tf.reduce_mean(tf.square(y_pred - y_true))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Custom metric function\n",
    "def my_mae(y_true, y_pred):\n",
    "    print(\"Tracing metric my_mae()\")\n",
    "    return tf.reduce_mean(tf.abs(y_pred - y_true))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Custom layer\n",
    "class MyDense(keras.layers.Layer):\n",
    "    def __init__(self, units, activation=None, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.units = units\n",
    "        self.activation = keras.activations.get(activation)\n",
    "\n",
    "    def build(self, input_shape):\n",
    "        self.kernel = self.add_weight(name='kernel', \n",
    "                                      shape=(input_shape[1], self.units),\n",
    "                                      initializer='uniform',\n",
    "                                      trainable=True)\n",
    "        self.biases = self.add_weight(name='bias', \n",
    "                                      shape=(self.units,),\n",
    "                                      initializer='zeros',\n",
    "                                      trainable=True)\n",
    "        super().build(input_shape)\n",
    "\n",
    "    def call(self, X):\n",
    "        print(\"Tracing MyDense.call()\")\n",
    "        return self.activation(X @ self.kernel + self.biases)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Custom model\n",
    "class MyModel(keras.models.Model):\n",
    "    def __init__(self, **kwargs):\n",
    "        super().__init__(**kwargs)\n",
    "        self.hidden1 = MyDense(30, activation=\"relu\")\n",
    "        self.hidden2 = MyDense(30, activation=\"relu\")\n",
    "        self.output_ = MyDense(1)\n",
    "\n",
    "    def call(self, input):\n",
    "        print(\"Tracing MyModel.call()\")\n",
    "        hidden1 = self.hidden1(input)\n",
    "        hidden2 = self.hidden2(hidden1)\n",
    "        concat = keras.layers.concatenate([input, hidden2])\n",
    "        output = self.output_(concat)\n",
    "        return output\n",
    "\n",
    "model = MyModel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled, y_train, epochs=2,\n",
    "          validation_data=(X_valid_scaled, y_valid))\n",
    "model.evaluate(X_test_scaled, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can turn this off by creating the model with `dynamic=True` (or calling `super().__init__(dynamic=True, **kwargs)` in the model's constructor):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = MyModel(dynamic=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 234,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Not the custom code will be called at each iteration. Let's fit, validate and evaluate with tiny datasets to avoid getting too much output:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled[:64], y_train[:64], epochs=1,\n",
    "          validation_data=(X_valid_scaled[:64], y_valid[:64]), verbose=0)\n",
    "model.evaluate(X_test_scaled[:64], y_test[:64], verbose=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Alternatively, you can compile a model with `run_eagerly=True`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 236,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = MyModel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 237,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae], run_eagerly=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.fit(X_train_scaled[:64], y_train[:64], epochs=1,\n",
    "          validation_data=(X_valid_scaled[:64], y_valid[:64]), verbose=0)\n",
    "model.evaluate(X_test_scaled[:64], y_test[:64], verbose=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom Optimizers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Defining custom optimizers is not very common, but in case you are one of the happy few who gets to write one, here is an example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyMomentumOptimizer(keras.optimizers.Optimizer):\n",
    "    def __init__(self, learning_rate=0.001, momentum=0.9, name=\"MyMomentumOptimizer\", **kwargs):\n",
    "        \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n",
    "        super().__init__(name, **kwargs)\n",
    "        self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n",
    "        self._set_hyper(\"decay\", self._initial_decay) # \n",
    "        self._set_hyper(\"momentum\", momentum)\n",
    "    \n",
    "    def _create_slots(self, var_list):\n",
    "        \"\"\"For each model variable, create the optimizer variable associated with it.\n",
    "        TensorFlow calls these optimizer variables \"slots\".\n",
    "        For momentum optimization, we need one momentum slot per model variable.\n",
    "        \"\"\"\n",
    "        for var in var_list:\n",
    "            self.add_slot(var, \"momentum\")\n",
    "\n",
    "    @tf.function\n",
    "    def _resource_apply_dense(self, grad, var):\n",
    "        \"\"\"Update the slots and perform one optimization step for one model variable\n",
    "        \"\"\"\n",
    "        var_dtype = var.dtype.base_dtype\n",
    "        lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n",
    "        momentum_var = self.get_slot(var, \"momentum\")\n",
    "        momentum_hyper = self._get_hyper(\"momentum\", var_dtype)\n",
    "        momentum_var.assign(momentum_var * momentum_hyper - (1. - momentum_hyper)* grad)\n",
    "        var.assign_add(momentum_var * lr_t)\n",
    "\n",
    "    def _resource_apply_sparse(self, grad, var):\n",
    "        raise NotImplementedError\n",
    "\n",
    "    def get_config(self):\n",
    "        base_config = super().get_config()\n",
    "        return {\n",
    "            **base_config,\n",
    "            \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n",
    "            \"decay\": self._serialize_hyperparameter(\"decay\"),\n",
    "            \"momentum\": self._serialize_hyperparameter(\"momentum\"),\n",
    "        }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model = keras.models.Sequential([keras.layers.Dense(1, input_shape=[8])])\n",
    "model.compile(loss=\"mse\", optimizer=MyMomentumOptimizer())\n",
    "model.fit(X_train_scaled, y_train, epochs=5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}