{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Chapter 12 – Custom Models and Training with TensorFlow**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "_This notebook contains all the sample code in chapter 12._" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Python ≥3.5 is required\n", "import sys\n", "assert sys.version_info >= (3, 5)\n", "\n", "# Scikit-Learn ≥0.20 is required\n", "import sklearn\n", "assert sklearn.__version__ >= \"0.20\"\n", "\n", "# TensorFlow ≥2.0-preview is required\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "assert tf.__version__ >= \"2.0\"\n", "\n", "# Common imports\n", "import numpy as np\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", "np.random.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "mpl.rc('axes', labelsize=14)\n", "mpl.rc('xtick', labelsize=12)\n", "mpl.rc('ytick', labelsize=12)\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"deep\"\n", "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format=fig_extension, dpi=resolution)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tensors and operations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tensors" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "tf.constant([[1., 2., 3.], [4., 5., 6.]]) # matrix" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "tf.constant(42) # scalar" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "t = tf.constant([[1., 2., 3.], [4., 5., 6.]])\n", "t" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "t.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "t.dtype" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Indexing" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "t[:, 1:]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "t[..., 1, tf.newaxis]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ops" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "t + 10" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "tf.square(t)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "t @ tf.transpose(t)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using `keras.backend`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "from tensorflow import keras\n", "K = keras.backend\n", "K.square(K.transpose(t)) + 10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### From/To NumPy" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "a = np.array([2., 4., 5.])\n", "tf.constant(a)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "t.numpy()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "np.array(t)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "tf.square(a)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "np.square(t)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Conflicting Types" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "try:\n", " tf.constant(2.0) + tf.constant(40)\n", "except tf.errors.InvalidArgumentError as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "try:\n", " tf.constant(2.0) + tf.constant(40., dtype=tf.float64)\n", "except tf.errors.InvalidArgumentError as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "t2 = tf.constant(40., dtype=tf.float64)\n", "tf.constant(2.0) + tf.cast(t2, tf.float32)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Strings" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "tf.constant(b\"hello world\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "tf.constant(\"café\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "u = tf.constant([ord(c) for c in \"café\"])\n", "u" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "b = tf.strings.unicode_encode(u, \"UTF-8\")\n", "tf.strings.length(b, unit=\"UTF8_CHAR\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "tf.strings.unicode_decode(b, \"UTF-8\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### String arrays" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "p = tf.constant([\"Café\", \"Coffee\", \"caffè\", \"咖啡\"])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "tf.strings.length(p, unit=\"UTF8_CHAR\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "r = tf.strings.unicode_decode(p, \"UTF8\")\n", "r" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "print(r)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ragged tensors" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "print(r[1])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "print(r[1:3])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "r2 = tf.ragged.constant([[65, 66], [], [67]])\n", "print(tf.concat([r, r2], axis=0))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "r3 = tf.ragged.constant([[68, 69, 70], [71], [], [72, 73]])\n", "print(tf.concat([r, r3], axis=1))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "tf.strings.unicode_encode(r3, \"UTF-8\")" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "r.to_tensor()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sparse tensors" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "s = tf.SparseTensor(indices=[[0, 1], [1, 0], [2, 3]],\n", " values=[1., 2., 3.],\n", " dense_shape=[3, 4])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "print(s)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "tf.sparse.to_dense(s)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "s2 = s * 2.0" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "try:\n", " s3 = s + 1.\n", "except TypeError as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "s4 = tf.constant([[10., 20.], [30., 40.], [50., 60.], [70., 80.]])\n", "tf.sparse.sparse_dense_matmul(s, s4)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "s5 = tf.SparseTensor(indices=[[0, 2], [0, 1]],\n", " values=[1., 2.],\n", " dense_shape=[3, 4])\n", "print(s5)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "try:\n", " tf.sparse.to_dense(s5)\n", "except tf.errors.InvalidArgumentError as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "s6 = tf.sparse.reorder(s5)\n", "tf.sparse.to_dense(s6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sets" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "set1 = tf.constant([[2, 3, 5, 7], [7, 9, 0, 0]])\n", "set2 = tf.constant([[4, 5, 6], [9, 10, 0]])\n", "tf.sparse.to_dense(tf.sets.union(set1, set2))" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "tf.sparse.to_dense(tf.sets.difference(set1, set2))" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "tf.sparse.to_dense(tf.sets.intersection(set1, set2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Variables" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "v.assign(2 * v)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "v[0, 1].assign(42)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "v[:, 2].assign([0., 1.])" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "try:\n", " v[1] = [7., 8., 9.]\n", "except TypeError as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "v.scatter_nd_update(indices=[[0, 0], [1, 2]],\n", " updates=[100., 200.])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "sparse_delta = tf.IndexedSlices(values=[[1., 2., 3.], [4., 5., 6.]],\n", " indices=[1, 0])\n", "v.scatter_update(sparse_delta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tensor Arrays" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "array = tf.TensorArray(dtype=tf.float32, size=3)\n", "array = array.write(0, tf.constant([1., 2.]))\n", "array = array.write(1, tf.constant([3., 10.]))\n", "array = array.write(2, tf.constant([5., 7.]))" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "array.read(1)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "array.stack()" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "mean, variance = tf.nn.moments(array.stack(), axes=0)\n", "mean" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "variance" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom loss function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's start by loading and preparing the California housing dataset. We first load it, then split it into a training set, a validation set and a test set, and finally we scale it:" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import fetch_california_housing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "housing = fetch_california_housing()\n", "X_train_full, X_test, y_train_full, y_test = train_test_split(\n", " housing.data, housing.target.reshape(-1, 1), random_state=42)\n", "X_train, X_valid, y_train, y_valid = train_test_split(\n", " X_train_full, y_train_full, random_state=42)\n", "\n", "scaler = StandardScaler()\n", "X_train_scaled = scaler.fit_transform(X_train)\n", "X_valid_scaled = scaler.transform(X_valid)\n", "X_test_scaled = scaler.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "def huber_fn(y_true, y_pred):\n", " error = y_true - y_pred\n", " is_small_error = tf.abs(error) < 1\n", " squared_loss = tf.square(error) / 2\n", " linear_loss = tf.abs(error) - 0.5\n", " return tf.where(is_small_error, squared_loss, linear_loss)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(8, 3.5))\n", "z = np.linspace(-4, 4, 200)\n", "plt.plot(z, huber_fn(0, z), \"b-\", linewidth=2, label=\"huber($z$)\")\n", "plt.plot(z, z**2 / 2, \"b:\", linewidth=1, label=r\"$\\frac{1}{2}z^2$\")\n", "plt.plot([-1, -1], [0, huber_fn(0., -1.)], \"r--\")\n", "plt.plot([1, 1], [0, huber_fn(0., 1.)], \"r--\")\n", "plt.gca().axhline(y=0, color='k')\n", "plt.gca().axvline(x=0, color='k')\n", "plt.axis([-4, 4, 0, 4])\n", "plt.grid(True)\n", "plt.xlabel(\"$z$\")\n", "plt.legend(fontsize=14)\n", "plt.title(\"Huber loss\", fontsize=14)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "input_shape = X_train.shape[1:]\n", "\n", "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1),\n", "])" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=huber_fn, optimizer=\"nadam\", metrics=[\"mae\"])" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "scrolled": true }, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Saving/Loading Models with Custom Objects" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_a_custom_loss.h5\")" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "model = keras.models.load_model(\"my_model_with_a_custom_loss.h5\",\n", " custom_objects={\"huber_fn\": huber_fn})" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "def create_huber(threshold=1.0):\n", " def huber_fn(y_true, y_pred):\n", " error = y_true - y_pred\n", " is_small_error = tf.abs(error) < threshold\n", " squared_loss = tf.square(error) / 2\n", " linear_loss = threshold * tf.abs(error) - threshold**2 / 2\n", " return tf.where(is_small_error, squared_loss, linear_loss)\n", " return huber_fn" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[\"mae\"])" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "scrolled": true }, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_a_custom_loss_threshold_2.h5\")" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "model = keras.models.load_model(\"my_model_with_a_custom_loss_threshold_2.h5\",\n", " custom_objects={\"huber_fn\": create_huber(2.0)})" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "class HuberLoss(keras.losses.Loss):\n", " def __init__(self, threshold=1.0, **kwargs):\n", " self.threshold = threshold\n", " super().__init__(**kwargs)\n", " def call(self, y_true, y_pred):\n", " error = y_true - y_pred\n", " is_small_error = tf.abs(error) < self.threshold\n", " squared_loss = tf.square(error) / 2\n", " linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2\n", " return tf.where(is_small_error, squared_loss, linear_loss)\n", " def get_config(self):\n", " base_config = super().get_config()\n", " return {**base_config, \"threshold\": self.threshold}" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1),\n", "])" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=HuberLoss(2.), optimizer=\"nadam\", metrics=[\"mae\"])" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "scrolled": false }, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_a_custom_loss_class.h5\")" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [], "source": [ "#model = keras.models.load_model(\"my_model_with_a_custom_loss_class.h5\", # TODO: check PR #25956\n", "# custom_objects={\"HuberLoss\": HuberLoss})" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "#model = keras.models.load_model(\"my_model_with_a_custom_loss_class.h5\", # TODO: check PR #25956\n", "# custom_objects={\"HuberLoss\": HuberLoss})" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "model.loss.threshold" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Other Custom Functions" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "def my_softplus(z): # return value is just tf.nn.softplus(z)\n", " return tf.math.log(tf.exp(z) + 1.0)\n", "\n", "def my_glorot_initializer(shape, dtype=tf.float32):\n", " stddev = tf.sqrt(2. / (shape[0] + shape[1]))\n", " return tf.random.normal(shape, stddev=stddev, dtype=dtype)\n", "\n", "def my_l1_regularizer(weights):\n", " return tf.reduce_sum(tf.abs(0.01 * weights))\n", "\n", "def my_positive_weights(weights): # return value is just tf.nn.relu(weights)\n", " return tf.where(weights < 0., tf.zeros_like(weights), weights)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "layer = keras.layers.Dense(1, activation=my_softplus,\n", " kernel_initializer=my_glorot_initializer,\n", " kernel_regularizer=my_l1_regularizer,\n", " kernel_constraint=my_positive_weights)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1, activation=my_softplus,\n", " kernel_regularizer=my_l1_regularizer,\n", " kernel_constraint=my_positive_weights,\n", " kernel_initializer=my_glorot_initializer),\n", "])" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[\"mae\"])" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "scrolled": false }, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_many_custom_parts.h5\")" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "# TODO: \n", "\"\"\"\n", "model = keras.models.load_model(\n", " \"my_model_with_many_custom_parts.h5\",\n", " custom_objects={\n", " \"my_l1_regularizer\": my_l1_regularizer(0.01),\n", " \"my_positive_weights\": my_positive_weights,\n", " \"my_glorot_initializer\": my_glorot_initializer,\n", " \"my_softplus\": my_softplus,\n", " })\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "class MyL1Regularizer(keras.regularizers.Regularizer):\n", " def __init__(self, factor):\n", " self.factor = factor\n", " def __call__(self, weights):\n", " return tf.reduce_sum(tf.abs(self.factor * weights))\n", " def get_config(self):\n", " return {\"factor\": self.factor}" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1, activation=my_softplus,\n", " kernel_regularizer=MyL1Regularizer(0.01),\n", " kernel_constraint=my_positive_weights,\n", " kernel_initializer=my_glorot_initializer),\n", "])" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[\"mae\"])" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_many_custom_parts.h5\")" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "# TODO: check https://github.com/tensorflow/tensorflow/issues/26061\n", "\"\"\"\n", "model = keras.models.load_model(\n", " \"my_model_with_many_custom_parts.h5\",\n", " custom_objects={\n", " \"MyL1Regularizer\": MyL1Regularizer,\n", " \"my_positive_weights\": my_positive_weights,\n", " \"my_glorot_initializer\": my_glorot_initializer,\n", " \"my_softplus\": my_softplus,\n", " })\n", "\"\"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Metrics" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1),\n", "])" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[create_huber(2.0)])" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Warning**: if you use the same function as the loss and a metric, you may be surprised to see different results. This is generally just due to floating point precision errors: even though the mathematical equations are equivalent, the operations are not run in the same order, which can lead to small differences. Moreover, when using sample weights, there's more than just precision errors:\n", "* the loss since the start of the epoch is the mean of all batch losses seen so far. Each batch loss is the sum of the weighted instance losses divided by the _batch size_ (not the sum of weights, so the batch loss is _not_ the weighted mean of the losses).\n", "* the metric since the start of the epoch is equal to the sum of weighted instance losses divided by sum of all weights seen so far. In other words, it is the weighted mean of all the instance losses. Not the same thing.\n", "\n", "If you do the math, you will find that metric = loss * mean of sample weights (plus some floating point precision error)." ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[create_huber(2.0)])" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [], "source": [ "sample_weight = np.random.rand(len(y_train))\n", "history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "history.history[\"loss\"][0], history.history[\"huber_fn\"][0] * sample_weight.mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Streaming metrics" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "precision = keras.metrics.Precision()\n", "precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "precision.result()" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "precision.variables" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "precision.reset_states()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Creating a streaming metric:" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "class HuberMetric(keras.metrics.Metric):\n", " def __init__(self, threshold=1.0, **kwargs):\n", " super().__init__(**kwargs) # handles base args (e.g., dtype)\n", " self.threshold = threshold\n", " self.huber_fn = create_huber(threshold)\n", " self.total = self.add_weight(\"total\", initializer=\"zeros\")\n", " self.count = self.add_weight(\"count\", initializer=\"zeros\")\n", " def update_state(self, y_true, y_pred, sample_weight=None):\n", " metric = self.huber_fn(y_true, y_pred)\n", " self.total.assign_add(tf.reduce_sum(metric))\n", " self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))\n", " def result(self):\n", " return self.total / self.count\n", " def get_config(self):\n", " base_config = super().get_config()\n", " return {**base_config, \"threshold\": self.threshold}" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "m = HuberMetric(2.)\n", "\n", "# total = 2 * |10 - 2| - 2²/2 = 14\n", "# count = 1\n", "# result = 14 / 1 = 14\n", "m(tf.constant([[2.]]), tf.constant([[10.]])) " ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "# total = total + (|1 - 0|² / 2) + (2 * |9.25 - 5| - 2² / 2) = 14 + 7 = 21\n", "# count = count + 2 = 3\n", "# result = total / count = 21 / 3 = 7\n", "m(tf.constant([[0.], [5.]]), tf.constant([[1.], [9.25]]))\n", "\n", "m.result()" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "m.variables" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "m.reset_states()\n", "m.variables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's check that the `HuberMetric` class works well:" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1),\n", "])" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[HuberMetric(2.0)])" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2)" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_a_custom_metric.h5\")" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "#model = keras.models.load_model(\"my_model_with_a_custom_metric.h5\", # TODO: check PR #25956\n", "# custom_objects={\"huber_fn\": create_huber(2.0),\n", "# \"HuberMetric\": HuberMetric})" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2)" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "model.metrics[0].threshold" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looks like it works fine! More simply, we could have created the class like this:" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [], "source": [ "class HuberMetric(keras.metrics.Mean):\n", " def __init__(self, threshold=1.0, name='HuberMetric', dtype=None):\n", " self.threshold = threshold\n", " self.huber_fn = create_huber(threshold)\n", " super().__init__(name=name, dtype=dtype)\n", " def update_state(self, y_true, y_pred, sample_weight=None):\n", " metric = self.huber_fn(y_true, y_pred)\n", " super(HuberMetric, self).update_state(metric, sample_weight)\n", " def get_config(self):\n", " base_config = super().get_config()\n", " return {**base_config, \"threshold\": self.threshold} " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This class handles shapes better, and it also supports sample weights." ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", " input_shape=input_shape),\n", " keras.layers.Dense(1),\n", "])" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=keras.losses.Huber(2.0), optimizer=\"nadam\", weighted_metrics=[HuberMetric(2.0)])" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "scrolled": true }, "outputs": [], "source": [ "sample_weight = np.random.rand(len(y_train))\n", "history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "history.history[\"loss\"][0], history.history[\"HuberMetric\"][0] * sample_weight.mean()" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_a_custom_metric_v2.h5\")" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [], "source": [ "#model = keras.models.load_model(\"my_model_with_a_custom_metric_v2.h5\", # TODO: check PR #25956\n", "# custom_objects={\"HuberMetric\": HuberMetric})" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2)" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "scrolled": true }, "outputs": [], "source": [ "model.metrics[0].threshold" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Layers" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [], "source": [ "exponential_layer = keras.layers.Lambda(lambda x: tf.exp(x))" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "exponential_layer([-1., 0., 1.])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Adding an exponential layer at the output of a regression model can be useful if the values to predict are positive and with very different scales (e.g., 0.001, 10., 10000):" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"relu\", input_shape=input_shape),\n", " keras.layers.Dense(1),\n", " exponential_layer\n", "])\n", "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "model.fit(X_train_scaled, y_train, epochs=5,\n", " validation_data=(X_valid_scaled, y_valid))\n", "model.evaluate(X_test_scaled, y_test)" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "class MyDense(keras.layers.Layer):\n", " def __init__(self, units, activation=None, **kwargs):\n", " super().__init__(**kwargs)\n", " self.units = units\n", " self.activation = keras.activations.get(activation)\n", "\n", " def build(self, batch_input_shape):\n", " self.kernel = self.add_weight(\n", " name=\"kernel\", shape=[batch_input_shape[-1], self.units],\n", " initializer=\"glorot_normal\")\n", " self.bias = self.add_weight(\n", " name=\"bias\", shape=[self.units], initializer=\"zeros\")\n", " super().build(batch_input_shape) # must be at the end\n", "\n", " def call(self, X):\n", " return self.activation(X @ self.kernel + self.bias)\n", "\n", " def compute_output_shape(self, batch_input_shape):\n", " return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])\n", "\n", " def get_config(self):\n", " base_config = super().get_config()\n", " return {**base_config, \"units\": self.units,\n", " \"activation\": keras.activations.serialize(self.activation)}" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [ "model = keras.models.Sequential([\n", " MyDense(30, activation=\"relu\", input_shape=input_shape),\n", " MyDense(1)\n", "])" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "scrolled": false }, "outputs": [], "source": [ "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))\n", "model.evaluate(X_test_scaled, y_test)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "model.save(\"my_model_with_a_custom_layer.h5\")" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "model = keras.models.load_model(\"my_model_with_a_custom_layer.h5\",\n", " custom_objects={\"MyDense\": MyDense})" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [], "source": [ "class MyMultiLayer(keras.layers.Layer):\n", " def call(self, X):\n", " X1, X2 = X\n", " return X1 + X2, X1 * X2\n", "\n", " def compute_output_shape(self, batch_input_shape):\n", " batch_input_shape1, batch_input_shape2 = batch_input_shape\n", " return [batch_input_shape1, batch_input_shape2]" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "inputs1 = keras.layers.Input(shape=[2])\n", "inputs2 = keras.layers.Input(shape=[2])\n", "outputs1, outputs2 = MyMultiLayer()((inputs1, inputs2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's create a layer with a different behavior during training and testing:" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [], "source": [ "class AddGaussianNoise(keras.layers.Layer):\n", " def __init__(self, stddev, **kwargs):\n", " super().__init__(**kwargs)\n", " self.stddev = stddev\n", "\n", " def call(self, X, training=None):\n", " if training is None:\n", " training = keras.backend.learning_phase()\n", " if training:\n", " noise = tf.random.normal(tf.shape(X), stddev=self.stddev)\n", " return X + noise\n", " else:\n", " return X\n", "\n", " def compute_output_shape(self, batch_input_shape):\n", " return batch_input_shape" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))\n", "model.evaluate(X_test_scaled, y_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Models" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [ "X_new_scaled = X_test_scaled" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [], "source": [ "class ResidualBlock(keras.layers.Layer):\n", " def __init__(self, n_layers, n_neurons, **kwargs):\n", " super().__init__(**kwargs)\n", " self.n_layers = n_layers # not shown in the book\n", " self.n_neurons = n_neurons # not shown\n", " self.hidden = [keras.layers.Dense(n_neurons, activation=\"elu\",\n", " kernel_initializer=\"he_normal\")\n", " for _ in range(n_layers)]\n", "\n", " def call(self, inputs):\n", " Z = inputs\n", " for layer in self.hidden:\n", " Z = layer(Z)\n", " return inputs + Z\n", " \n", " def get_config(self): # not shown\n", " base_config = super().get_config() # not shown\n", " return {**base_config, # not shown\n", " \"n_layers\": self.n_layers, \"n_neurons\": n_neurons} # not shown" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "class ResidualRegressor(keras.models.Model):\n", " def __init__(self, output_dim, **kwargs):\n", " super().__init__(**kwargs)\n", " self.output_dim = output_dim # not shown in the book\n", " self.hidden1 = keras.layers.Dense(30, activation=\"elu\",\n", " kernel_initializer=\"he_normal\")\n", " self.block1 = ResidualBlock(2, 30)\n", " self.block2 = ResidualBlock(2, 30)\n", " self.out = keras.layers.Dense(output_dim)\n", "\n", " def call(self, inputs):\n", " Z = self.hidden1(inputs)\n", " for _ in range(1 + 3):\n", " Z = self.block1(Z)\n", " Z = self.block2(Z)\n", " return self.out(Z)\n", "\n", " def get_config(self): # not shown\n", " base_config = super().get_config() # not shown\n", " return {**base_config, # not shown\n", " \"output_dim\": self.output_dim} # not shown" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [], "source": [ "model = ResidualRegressor(1)\n", "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "history = model.fit(X_train_scaled, y_train, epochs=5)\n", "score = model.evaluate(X_test_scaled, y_test)\n", "y_pred = model.predict(X_new_scaled)" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [], "source": [ "#TODO: check that persistence ends up working in TF2\n", "#model.save(\"my_custom_model.h5\")\n", "#model = keras.models.load_model(\"my_custom_model.h5\",\n", "# custom_objects={\n", "# \"ResidualBlock\": ResidualBlock,\n", "# \"ResidualRegressor\": ResidualRegressor\n", "# })" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We could have defined the model using the sequential API instead:" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [], "source": [ "block1 = ResidualBlock(2, 30)\n", "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", " block1, block1, block1, block1,\n", " ResidualBlock(2, 30),\n", " keras.layers.Dense(1)\n", "])" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "history = model.fit(X_train_scaled, y_train, epochs=5)\n", "score = model.evaluate(X_test_scaled, y_test)\n", "y_pred = model.predict(X_new_scaled)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Losses and Metrics Based on Model Internals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "TODO: check https://github.com/tensorflow/tensorflow/issues/26260\n", "```python\n", "class ReconstructingRegressor(keras.models.Model):\n", " def __init__(self, output_dim, **kwargs):\n", " super().__init__(**kwargs)\n", " self.hidden = [keras.layers.Dense(30, activation=\"selu\",\n", " kernel_initializer=\"lecun_normal\")\n", " for _ in range(5)]\n", " self.out = keras.layers.Dense(output_dim)\n", " self.reconstruction_mean = keras.metrics.Mean(name=\"reconstruction_error\")\n", "\n", " def build(self, batch_input_shape):\n", " n_inputs = batch_input_shape[-1]\n", " self.reconstruct = keras.layers.Dense(n_inputs)\n", " super().build(batch_input_shape)\n", "\n", " @tf.function\n", " def call(self, inputs, training=None):\n", " if training is None:\n", " training = keras.backend.learning_phase()\n", " Z = inputs\n", " for layer in self.hidden:\n", " Z = layer(Z)\n", " reconstruction = self.reconstruct(Z)\n", " recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))\n", " self.add_loss(0.05 * reconstruction_loss)\n", " if training:\n", " result = self.reconstruction_mean(recon_loss)\n", " self.add_metric(result)\n", " return self.out(Z)\n", "\n", "model = ReconstructingRegressor(1)\n", "model.build(tf.TensorShape([None, 8])) # <= Fails if this line is removed\n", "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "history = model.fit(X, y, epochs=2)\n", "```" ] }, { "cell_type": "code", "execution_count": 148, "metadata": {}, "outputs": [], "source": [ "class ReconstructingRegressor(keras.models.Model):\n", " def __init__(self, output_dim, **kwargs):\n", " super().__init__(**kwargs)\n", " self.hidden = [keras.layers.Dense(30, activation=\"selu\",\n", " kernel_initializer=\"lecun_normal\")\n", " for _ in range(5)]\n", " self.out = keras.layers.Dense(output_dim)\n", "\n", " def build(self, batch_input_shape):\n", " n_inputs = batch_input_shape[-1]\n", " self.reconstruct = keras.layers.Dense(n_inputs)\n", " super().build(batch_input_shape)\n", "\n", " def call(self, inputs):\n", " Z = inputs\n", " for layer in self.hidden:\n", " Z = layer(Z)\n", " reconstruction = self.reconstruct(Z)\n", " recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))\n", " self.add_loss(0.05 * recon_loss)\n", " return self.out(Z)" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [], "source": [ "model = ReconstructingRegressor(1)\n", "model.build(tf.TensorShape([None, 8])) # TODO: check https://github.com/tensorflow/tensorflow/issues/26274\n", "model.compile(loss=\"mse\", optimizer=\"nadam\")\n", "history = model.fit(X_train_scaled, y_train, epochs=2)\n", "y_pred = model.predict(X_test_scaled)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Computing Gradients Using Autodiff" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [], "source": [ "def f(w1, w2):\n", " return 3 * w1 ** 2 + 2 * w1 * w2" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [], "source": [ "w1, w2 = 5, 3\n", "eps = 1e-6\n", "(f(w1 + eps, w2) - f(w1, w2)) / eps" ] }, { "cell_type": "code", "execution_count": 152, "metadata": {}, "outputs": [], "source": [ "(f(w1, w2 + eps) - f(w1, w2)) / eps" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [], "source": [ "w1, w2 = tf.Variable(5.), tf.Variable(3.)\n", "with tf.GradientTape() as tape:\n", " z = f(w1, w2)\n", "\n", "gradients = tape.gradient(z, [w1, w2])" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [], "source": [ "gradients" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "with tf.GradientTape() as tape:\n", " z = f(w1, w2)\n", "\n", "dz_dw1 = tape.gradient(z, w1)\n", "try:\n", " dz_dw2 = tape.gradient(z, w2)\n", "except RuntimeError as ex:\n", " print(ex)" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [], "source": [ "with tf.GradientTape(persistent=True) as tape:\n", " z = f(w1, w2)\n", "\n", "dz_dw1 = tape.gradient(z, w1)\n", "dz_dw2 = tape.gradient(z, w2) # works now!\n", "del tape" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [], "source": [ "dz_dw1, dz_dw2" ] }, { "cell_type": "code", "execution_count": 158, "metadata": {}, "outputs": [], "source": [ "c1, c2 = tf.constant(5.), tf.constant(3.)\n", "with tf.GradientTape() as tape:\n", " z = f(c1, c2)\n", "\n", "gradients = tape.gradient(z, [c1, c2])" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [], "source": [ "gradients" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [], "source": [ "with tf.GradientTape() as tape:\n", " tape.watch(c1)\n", " tape.watch(c2)\n", " z = f(c1, c2)\n", "\n", "gradients = tape.gradient(z, [c1, c2])" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [], "source": [ "gradients" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "with tf.GradientTape() as tape:\n", " z1 = f(w1, w2 + 2.)\n", " z2 = f(w1, w2 + 5.)\n", " z3 = f(w1, w2 + 7.)\n", "\n", "tape.gradient([z1, z2, z3], [w1, w2])" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [], "source": [ "with tf.GradientTape(persistent=True) as tape:\n", " z1 = f(w1, w2 + 2.)\n", " z2 = f(w1, w2 + 5.)\n", " z3 = f(w1, w2 + 7.)\n", "\n", "tf.reduce_sum(tf.stack([tape.gradient(z, [w1, w2]) for z in (z1, z2, z3)]), axis=0)\n", "del tape" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [], "source": [ "with tf.GradientTape(persistent=True) as hessian_tape:\n", " with tf.GradientTape() as jacobian_tape:\n", " z = f(w1, w2)\n", " jacobians = jacobian_tape.gradient(z, [w1, w2])\n", "hessians = [hessian_tape.gradient(jacobian, [w1, w2])\n", " for jacobian in jacobians]\n", "del hessian_tape" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [], "source": [ "jacobians" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [], "source": [ "hessians" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [], "source": [ "def f(w1, w2):\n", " return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)\n", "\n", "with tf.GradientTape() as tape:\n", " z = f(w1, w2)\n", "\n", "tape.gradient(z, [w1, w2])" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "x = tf.Variable(100.)\n", "with tf.GradientTape() as tape:\n", " z = my_softplus(x)\n", "\n", "tape.gradient(z, [x])" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [], "source": [ "tf.math.log(tf.exp(tf.constant(30., dtype=tf.float32)) + 1.)" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [], "source": [ "x = tf.Variable([100.])\n", "with tf.GradientTape() as tape:\n", " z = my_softplus(x)\n", "\n", "tape.gradient(z, [x])" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [], "source": [ "@tf.custom_gradient\n", "def my_better_softplus(z):\n", " exp = tf.exp(z)\n", " def my_softplus_gradients(grad):\n", " return grad / (1 + 1 / exp)\n", " return tf.math.log(exp + 1), my_softplus_gradients" ] }, { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [], "source": [ "def my_better_softplus(z):\n", " return tf.where(z > 30., z, tf.math.log(tf.exp(z) + 1.))" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [], "source": [ "x = tf.Variable([1000.])\n", "with tf.GradientTape() as tape:\n", " z = my_better_softplus(x)\n", "\n", "z, tape.gradient(z, [x])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Computing Gradients Using Autodiff" ] }, { "cell_type": "code", "execution_count": 174, "metadata": {}, "outputs": [], "source": [ "l2_reg = keras.regularizers.l2(0.05)\n", "model = keras.models.Sequential([\n", " keras.layers.Dense(30, activation=\"elu\", kernel_initializer=\"he_normal\",\n", " kernel_regularizer=l2_reg),\n", " keras.layers.Dense(1, kernel_regularizer=l2_reg)\n", "])" ] }, { "cell_type": "code", "execution_count": 175, "metadata": {}, "outputs": [], "source": [ "def random_batch(X, y, batch_size=32):\n", " idx = np.random.randint(len(X), size=batch_size)\n", " return X[idx], y[idx]" ] }, { "cell_type": "code", "execution_count": 176, "metadata": {}, "outputs": [], "source": [ "def print_status_bar(iteration, total, loss, metrics=None):\n", " metrics = \" - \".join([\"{}: {:.4f}\".format(m.name, m.result())\n", " for m in [loss] + (metrics or [])])\n", " end = \"\" if iteration < total else \"\\n\"\n", " print(\"\\r{}/{} - \".format(iteration, total) + metrics,\n", " end=end)" ] }, { "cell_type": "code", "execution_count": 177, "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "mean_loss = keras.metrics.Mean(name=\"loss\")\n", "mean_square = keras.metrics.Mean(name=\"mean_square\")\n", "for i in range(1, 50 + 1):\n", " loss = 1 / i\n", " mean_loss(loss)\n", " mean_square(i ** 2)\n", " print_status_bar(i, 50, mean_loss, [mean_square])\n", " time.sleep(0.05)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A fancier version with a progress bar:" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [], "source": [ "def progress_bar(iteration, total, size=30):\n", " running = iteration < total\n", " c = \">\" if running else \"=\"\n", " p = (size - 1) * iteration // total\n", " fmt = \"{{:-{}d}}/{{}} [{{}}]\".format(len(str(total)))\n", " params = [iteration, total, \"=\" * p + c + \".\" * (size - p - 1)]\n", " return fmt.format(*params)" ] }, { "cell_type": "code", "execution_count": 179, "metadata": {}, "outputs": [], "source": [ "progress_bar(3500, 10000, size=6)" ] }, { "cell_type": "code", "execution_count": 180, "metadata": {}, "outputs": [], "source": [ "def print_status_bar(iteration, total, loss, metrics=None, size=30):\n", " metrics = \" - \".join([\"{}: {:.4f}\".format(m.name, m.result())\n", " for m in [loss] + (metrics or [])])\n", " end = \"\" if iteration < total else \"\\n\"\n", " print(\"\\r{} - {}\".format(progress_bar(iteration, total), metrics), end=end)" ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [], "source": [ "mean_loss = keras.metrics.Mean(name=\"loss\")\n", "mean_square = keras.metrics.Mean(name=\"mean_square\")\n", "for i in range(1, 50 + 1):\n", " loss = 1 / i\n", " mean_loss(loss)\n", " mean_square(i ** 2)\n", " print_status_bar(i, 50, mean_loss, [mean_square])\n", " time.sleep(0.05)" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [], "source": [ "n_epochs = 5\n", "batch_size = 32\n", "n_steps = len(X_train) // batch_size\n", "optimizer = keras.optimizers.Nadam(lr=0.01)\n", "loss_fn = keras.losses.mean_squared_error\n", "mean_loss = keras.metrics.Mean()\n", "metrics = [keras.metrics.MeanAbsoluteError()]" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [], "source": [ "for epoch in range(1, n_epochs + 1):\n", " print(\"Epoch {}/{}\".format(epoch, n_epochs))\n", " for step in range(1, n_steps + 1):\n", " X_batch, y_batch = random_batch(X_train_scaled, y_train)\n", " with tf.GradientTape() as tape:\n", " y_pred = model(X_batch)\n", " main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n", " loss = tf.add_n([main_loss] + model.losses)\n", " gradients = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", " for variable in model.variables:\n", " if variable.constraint is not None:\n", " variable.assign(variable.constraint(variable))\n", " mean_loss(loss)\n", " for metric in metrics:\n", " metric(y_batch, y_pred)\n", " print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)\n", " print_status_bar(len(y_train), len(y_train), mean_loss, metrics)\n", " for metric in [mean_loss] + metrics:\n", " metric.reset_states()" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "try:\n", " from tqdm import tnrange\n", " from collections import OrderedDict\n", " with tnrange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n", " for epoch in epochs:\n", " with tnrange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n", " for step in steps:\n", " X_batch, y_batch = random_batch(X_train_scaled, y_train)\n", " with tf.GradientTape() as tape:\n", " y_pred = model(X_batch)\n", " main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n", " loss = tf.add_n([main_loss] + model.losses)\n", " gradients = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", " for variable in model.variables:\n", " if variable.constraint is not None:\n", " variable.assign(variable.constraint(variable)) \n", " status = OrderedDict()\n", " mean_loss(loss)\n", " status[\"loss\"] = mean_loss.result().numpy()\n", " for metric in metrics:\n", " metric(y_batch, y_pred)\n", " status[metric.name] = metric.result().numpy()\n", " steps.set_postfix(status)\n", " for metric in [mean_loss] + metrics:\n", " metric.reset_states()\n", "except ImportError as ex:\n", " print(\"To run this cell, please install tqdm, ipywidgets and restart Jupyter\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TensorFlow Functions" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [], "source": [ "def cube(x):\n", " return x ** 3" ] }, { "cell_type": "code", "execution_count": 186, "metadata": {}, "outputs": [], "source": [ "cube(2)" ] }, { "cell_type": "code", "execution_count": 187, "metadata": {}, "outputs": [], "source": [ "cube(tf.constant(2.0))" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [], "source": [ "tf_cube = tf.function(cube)\n", "tf_cube" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [], "source": [ "tf_cube(2)" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "tf_cube(tf.constant(2.0))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### TF Functions and Concrete Functions" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [], "source": [ "concrete_function = tf_cube.get_concrete_function(tf.constant(2.0))\n", "concrete_function.graph" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [], "source": [ "concrete_function(tf.constant(2.0))" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [], "source": [ "concrete_function is tf_cube.get_concrete_function(tf.constant(2.0))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exploring Function Definitions and Graphs" ] }, { "cell_type": "code", "execution_count": 194, "metadata": {}, "outputs": [], "source": [ "concrete_function.graph" ] }, { "cell_type": "code", "execution_count": 195, "metadata": {}, "outputs": [], "source": [ "ops = concrete_function.graph.get_operations()\n", "ops" ] }, { "cell_type": "code", "execution_count": 196, "metadata": {}, "outputs": [], "source": [ "pow_op = ops[2]\n", "list(pow_op.inputs)" ] }, { "cell_type": "code", "execution_count": 197, "metadata": {}, "outputs": [], "source": [ "pow_op.outputs" ] }, { "cell_type": "code", "execution_count": 198, "metadata": {}, "outputs": [], "source": [ "concrete_function.graph.get_operation_by_name('x')" ] }, { "cell_type": "code", "execution_count": 199, "metadata": {}, "outputs": [], "source": [ "concrete_function.graph.get_tensor_by_name('Identity:0')" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [], "source": [ "concrete_function.function_def.signature" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How TF Functions Trace Python Functions to Extract Their Computation Graphs" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [], "source": [ "@tf.function\n", "def tf_cube(x):\n", " print(\"print:\", x)\n", " return x ** 3" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [], "source": [ "result = tf_cube(tf.constant(2.0))" ] }, { "cell_type": "code", "execution_count": 203, "metadata": {}, "outputs": [], "source": [ "result" ] }, { "cell_type": "code", "execution_count": 204, "metadata": {}, "outputs": [], "source": [ "result = tf_cube(2)\n", "result = tf_cube(3)\n", "result = tf_cube(tf.constant([[1., 2.]])) # New shape: trace!\n", "result = tf_cube(tf.constant([[3., 4.], [5., 6.]])) # New shape: trace!\n", "result = tf_cube(tf.constant([[7., 8.], [9., 10.], [11., 12.]])) # no trace" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It is also possible to specify a particular input signature:" ] }, { "cell_type": "code", "execution_count": 205, "metadata": {}, "outputs": [], "source": [ "@tf.function(input_signature=[tf.TensorSpec([None, 28, 28], tf.float32)])\n", "def shrink(images):\n", " print(\"Tracing\", images)\n", " return images[:, ::2, ::2] # drop half the rows and columns" ] }, { "cell_type": "code", "execution_count": 206, "metadata": {}, "outputs": [], "source": [ "img_batch_1 = tf.random.uniform(shape=[100, 28, 28])\n", "img_batch_2 = tf.random.uniform(shape=[50, 28, 28])\n", "preprocessed_images = shrink(img_batch_1) # Traces the function.\n", "preprocessed_images = shrink(img_batch_2) # Reuses the same concrete function." ] }, { "cell_type": "code", "execution_count": 207, "metadata": {}, "outputs": [], "source": [ "img_batch_3 = tf.random.uniform(shape=[2, 2, 2])\n", "try:\n", " preprocessed_images = shrink(img_batch_3) # rejects unexpected types or shapes\n", "except ValueError as ex:\n", " print(ex)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using Autograph To Capture Control Flow" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A \"static\" `for` loop using `range()`:" ] }, { "cell_type": "code", "execution_count": 208, "metadata": {}, "outputs": [], "source": [ "@tf.function\n", "def add_10(x):\n", " for i in range(10):\n", " x += 1\n", " return x" ] }, { "cell_type": "code", "execution_count": 209, "metadata": {}, "outputs": [], "source": [ "add_10(tf.constant(5))" ] }, { "cell_type": "code", "execution_count": 210, "metadata": {}, "outputs": [], "source": [ "add_10.get_concrete_function(tf.constant(5)).graph.get_operations()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A \"dynamic\" loop using `tf.while_loop()`:" ] }, { "cell_type": "code", "execution_count": 211, "metadata": {}, "outputs": [], "source": [ "@tf.function\n", "def add_10(x):\n", " condition = lambda i, x: tf.less(i, 10)\n", " body = lambda i, x: (tf.add(i, 1), tf.add(x, 1))\n", " final_i, final_x = tf.while_loop(condition, body, [tf.constant(0), x])\n", " return final_x" ] }, { "cell_type": "code", "execution_count": 212, "metadata": {}, "outputs": [], "source": [ "add_10(tf.constant(5))" ] }, { "cell_type": "code", "execution_count": 213, "metadata": {}, "outputs": [], "source": [ "add_10.get_concrete_function(tf.constant(5)).graph.get_operations()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A \"dynamic\" `for` loop using `tf.range()` (captured by autograph):" ] }, { "cell_type": "code", "execution_count": 214, "metadata": {}, "outputs": [], "source": [ "@tf.function\n", "def add_10(x):\n", " for i in tf.range(10):\n", " x = x + 1\n", " return x" ] }, { "cell_type": "code", "execution_count": 215, "metadata": {}, "outputs": [], "source": [ "add_10.get_concrete_function(tf.constant(0)).graph.get_operations()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Handling Variables and Other Resources in TF Functions" ] }, { "cell_type": "code", "execution_count": 216, "metadata": {}, "outputs": [], "source": [ "counter = tf.Variable(0)\n", "\n", "@tf.function\n", "def increment(counter, c=1):\n", " return counter.assign_add(c)" ] }, { "cell_type": "code", "execution_count": 217, "metadata": {}, "outputs": [], "source": [ "increment(counter)\n", "increment(counter)" ] }, { "cell_type": "code", "execution_count": 218, "metadata": {}, "outputs": [], "source": [ "function_def = increment.get_concrete_function(counter).function_def\n", "function_def.signature.input_arg[0]" ] }, { "cell_type": "code", "execution_count": 219, "metadata": {}, "outputs": [], "source": [ "counter = tf.Variable(0)\n", "\n", "@tf.function\n", "def increment(c=1):\n", " return counter.assign_add(c)" ] }, { "cell_type": "code", "execution_count": 220, "metadata": {}, "outputs": [], "source": [ "increment()\n", "increment()" ] }, { "cell_type": "code", "execution_count": 221, "metadata": {}, "outputs": [], "source": [ "function_def = increment.get_concrete_function().function_def\n", "function_def.signature.input_arg[0]" ] }, { "cell_type": "code", "execution_count": 222, "metadata": {}, "outputs": [], "source": [ "class Counter:\n", " def __init__(self):\n", " self.counter = tf.Variable(0)\n", "\n", " @tf.function\n", " def increment(self, c=1):\n", " return self.counter.assign_add(c)" ] }, { "cell_type": "code", "execution_count": 223, "metadata": {}, "outputs": [], "source": [ "c = Counter()\n", "c.increment()\n", "c.increment()" ] }, { "cell_type": "code", "execution_count": 224, "metadata": { "scrolled": true }, "outputs": [], "source": [ "@tf.function\n", "def add_10(x):\n", " for i in tf.range(10):\n", " x += 1\n", " return x\n", "\n", "tf.autograph.to_code(add_10.python_function, experimental_optional_features=None)\n", "# TODO: experimental_optional_features is needed to have the same behavior as @tf.function,\n", "# check that this is not needed when TF2 is released" ] }, { "cell_type": "code", "execution_count": 225, "metadata": {}, "outputs": [], "source": [ "def display_tf_code(func, experimental_optional_features=None):\n", " from IPython.display import display, Markdown\n", " if hasattr(func, \"python_function\"):\n", " func = func.python_function\n", " code = tf.autograph.to_code(func, experimental_optional_features=experimental_optional_features)\n", " display(Markdown('```python\\n{}\\n```'.format(code)))" ] }, { "cell_type": "code", "execution_count": 226, "metadata": {}, "outputs": [], "source": [ "display_tf_code(add_10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using TF Functions with tf.keras (or Not)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By default, tf.keras will automatically convert your custom code into TF Functions, no need to use\n", "`tf.function()`:" ] }, { "cell_type": "code", "execution_count": 227, "metadata": {}, "outputs": [], "source": [ "# Custom loss function\n", "def my_mse(y_true, y_pred):\n", " print(\"Tracing loss my_mse()\")\n", " return tf.reduce_mean(tf.square(y_pred - y_true))" ] }, { "cell_type": "code", "execution_count": 228, "metadata": {}, "outputs": [], "source": [ "# Custom metric function\n", "def my_mae(y_true, y_pred):\n", " print(\"Tracing metric my_mae()\")\n", " return tf.reduce_mean(tf.abs(y_pred - y_true))" ] }, { "cell_type": "code", "execution_count": 229, "metadata": {}, "outputs": [], "source": [ "# Custom layer\n", "class MyDense(keras.layers.Layer):\n", " def __init__(self, units, activation=None, **kwargs):\n", " super().__init__(**kwargs)\n", " self.units = units\n", " self.activation = keras.activations.get(activation)\n", "\n", " def build(self, input_shape):\n", " self.kernel = self.add_weight(name='kernel', \n", " shape=(input_shape[1], self.units),\n", " initializer='uniform',\n", " trainable=True)\n", " self.biases = self.add_weight(name='bias', \n", " shape=(self.units,),\n", " initializer='zeros',\n", " trainable=True)\n", " super().build(input_shape)\n", "\n", " def call(self, X):\n", " print(\"Tracing MyDense.call()\")\n", " return self.activation(X @ self.kernel + self.biases)" ] }, { "cell_type": "code", "execution_count": 230, "metadata": {}, "outputs": [], "source": [ "# Custom model\n", "class MyModel(keras.models.Model):\n", " def __init__(self, **kwargs):\n", " super().__init__(**kwargs)\n", " self.hidden1 = MyDense(30, activation=\"relu\")\n", " self.hidden2 = MyDense(30, activation=\"relu\")\n", " self.output_ = MyDense(1)\n", "\n", " def call(self, input):\n", " print(\"Tracing MyModel.call()\")\n", " hidden1 = self.hidden1(input)\n", " hidden2 = self.hidden2(hidden1)\n", " concat = keras.layers.concatenate([input, hidden2])\n", " output = self.output_(concat)\n", " return output\n", "\n", "model = MyModel()" ] }, { "cell_type": "code", "execution_count": 231, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae])" ] }, { "cell_type": "code", "execution_count": 232, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=2,\n", " validation_data=(X_valid_scaled, y_valid))\n", "model.evaluate(X_test_scaled, y_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can turn this off by creating the model with `dynamic=True` (or calling `super().__init__(dynamic=True, **kwargs)` in the model's constructor):" ] }, { "cell_type": "code", "execution_count": 233, "metadata": {}, "outputs": [], "source": [ "model = MyModel(dynamic=True)" ] }, { "cell_type": "code", "execution_count": 234, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Not the custom code will be called at each iteration. Let's fit, validate and evaluate with tiny datasets to avoid getting too much output:" ] }, { "cell_type": "code", "execution_count": 235, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled[:64], y_train[:64], epochs=1,\n", " validation_data=(X_valid_scaled[:64], y_valid[:64]), verbose=0)\n", "model.evaluate(X_test_scaled[:64], y_test[:64], verbose=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Alternatively, you can compile a model with `run_eagerly=True`:" ] }, { "cell_type": "code", "execution_count": 236, "metadata": {}, "outputs": [], "source": [ "model = MyModel()" ] }, { "cell_type": "code", "execution_count": 237, "metadata": {}, "outputs": [], "source": [ "model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae], run_eagerly=True)" ] }, { "cell_type": "code", "execution_count": 238, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled[:64], y_train[:64], epochs=1,\n", " validation_data=(X_valid_scaled[:64], y_valid[:64]), verbose=0)\n", "model.evaluate(X_test_scaled[:64], y_test[:64], verbose=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Optimizers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Defining custom optimizers is not very common, but in case you are one of the happy few who gets to write one, here is an example:" ] }, { "cell_type": "code", "execution_count": 239, "metadata": {}, "outputs": [], "source": [ "class MyMomentumOptimizer(keras.optimizers.Optimizer):\n", " def __init__(self, learning_rate=0.001, momentum=0.9, name=\"MyMomentumOptimizer\", **kwargs):\n", " \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n", " super().__init__(name, **kwargs)\n", " self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n", " self._set_hyper(\"decay\", self._initial_decay) # \n", " self._set_hyper(\"momentum\", momentum)\n", " \n", " def _create_slots(self, var_list):\n", " \"\"\"For each model variable, create the optimizer variable associated with it.\n", " TensorFlow calls these optimizer variables \"slots\".\n", " For momentum optimization, we need one momentum slot per model variable.\n", " \"\"\"\n", " for var in var_list:\n", " self.add_slot(var, \"momentum\")\n", "\n", " @tf.function\n", " def _resource_apply_dense(self, grad, var):\n", " \"\"\"Update the slots and perform one optimization step for one model variable\n", " \"\"\"\n", " var_dtype = var.dtype.base_dtype\n", " lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n", " momentum_var = self.get_slot(var, \"momentum\")\n", " momentum_hyper = self._get_hyper(\"momentum\", var_dtype)\n", " momentum_var.assign(momentum_var * momentum_hyper - (1. - momentum_hyper)* grad)\n", " var.assign_add(momentum_var * lr_t)\n", "\n", " def _resource_apply_sparse(self, grad, var):\n", " raise NotImplementedError\n", "\n", " def get_config(self):\n", " base_config = super().get_config()\n", " return {\n", " **base_config,\n", " \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n", " \"decay\": self._serialize_hyperparameter(\"decay\"),\n", " \"momentum\": self._serialize_hyperparameter(\"momentum\"),\n", " }" ] }, { "cell_type": "code", "execution_count": 240, "metadata": { "scrolled": false }, "outputs": [], "source": [ "model = keras.models.Sequential([keras.layers.Dense(1, input_shape=[8])])\n", "model.compile(loss=\"mse\", optimizer=MyMomentumOptimizer())\n", "model.fit(X_train_scaled, y_train, epochs=5)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }