3984 lines
100 KiB
Plaintext
3984 lines
100 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Chapter 11 – Custom Models and Training with TensorFlow**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"_This notebook contains all the sample code and solutions to the exercises in chapter 11._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"<table align=\"left\">\n",
|
||
" <td>\n",
|
||
" <a href=\"https://colab.research.google.com/github/ageron/handson-ml2/blob/master/12_custom_models_and_training_with_tensorflow.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||
" </td>\n",
|
||
" <td>\n",
|
||
" <a target=\"_blank\" href=\"https://kaggle.com/kernels/welcome?src=https://github.com/ageron/handson-ml2/blob/add-kaggle-badge/12_custom_models_and_training_with_tensorflow.ipynb\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" /></a>\n",
|
||
" </td>\n",
|
||
"</table>"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Python ≥3.8 is required\n",
|
||
"import sys\n",
|
||
"assert sys.version_info >= (3, 8)\n",
|
||
"\n",
|
||
"# Common imports\n",
|
||
"import numpy as np\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"# Scikit-Learn ≥1.0 is required\n",
|
||
"import sklearn\n",
|
||
"assert sklearn.__version__ >= \"1.0\"\n",
|
||
"\n",
|
||
"# TensorFlow ≥2.6 is required\n",
|
||
"import tensorflow as tf\n",
|
||
"assert tf.__version__ >= \"2.6\"\n",
|
||
"\n",
|
||
"# to make this notebook's output stable across runs\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"# To plot pretty figures\n",
|
||
"%matplotlib inline\n",
|
||
"import matplotlib as mpl\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"mpl.rc('axes', labelsize=14)\n",
|
||
"mpl.rc('xtick', labelsize=12)\n",
|
||
"mpl.rc('ytick', labelsize=12)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Tensors and operations"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Tensors"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.constant([[1., 2., 3.], [4., 5., 6.]]) # matrix"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.constant(42) # scalar"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t = tf.constant([[1., 2., 3.], [4., 5., 6.]])\n",
|
||
"t"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t.dtype"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Indexing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t[:, 1:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t[..., 1, tf.newaxis]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Ops"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t + 10"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.square(t)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t @ tf.transpose(t)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Using `tf.keras.backend`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tensorflow import keras\n",
|
||
"K = tf.keras.backend\n",
|
||
"K.square(K.transpose(t)) + 10"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### From/To NumPy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a = np.array([2., 4., 5.])\n",
|
||
"tf.constant(a)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t.numpy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.array(t)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.square(a)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.square(t)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Conflicting Types"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" tf.constant(2.0) + tf.constant(40)\n",
|
||
"except tf.errors.InvalidArgumentError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" tf.constant(2.0) + tf.constant(40., dtype=tf.float64)\n",
|
||
"except tf.errors.InvalidArgumentError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"t2 = tf.constant(40., dtype=tf.float64)\n",
|
||
"tf.constant(2.0) + tf.cast(t2, tf.float32)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Strings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.constant(b\"hello world\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.constant(\"café\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"u = tf.constant([ord(c) for c in \"café\"])\n",
|
||
"u"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"b = tf.strings.unicode_encode(u, \"UTF-8\")\n",
|
||
"tf.strings.length(b, unit=\"UTF8_CHAR\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.strings.unicode_decode(b, \"UTF-8\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### String arrays"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"p = tf.constant([\"Café\", \"Coffee\", \"caffè\", \"咖啡\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.strings.length(p, unit=\"UTF8_CHAR\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"r = tf.strings.unicode_decode(p, \"UTF8\")\n",
|
||
"r"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(r)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Ragged tensors"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(r[1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(r[1:3])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"r2 = tf.ragged.constant([[65, 66], [], [67]])\n",
|
||
"print(tf.concat([r, r2], axis=0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"r3 = tf.ragged.constant([[68, 69, 70], [71], [], [72, 73]])\n",
|
||
"print(tf.concat([r, r3], axis=1))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.strings.unicode_encode(r3, \"UTF-8\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"r.to_tensor()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Sparse tensors"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"s = tf.SparseTensor(indices=[[0, 1], [1, 0], [2, 3]],\n",
|
||
" values=[1., 2., 3.],\n",
|
||
" dense_shape=[3, 4])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(s)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.sparse.to_dense(s)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"s2 = s * 2.0"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" s3 = s + 1.\n",
|
||
"except TypeError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"s4 = tf.constant([[10., 20.], [30., 40.], [50., 60.], [70., 80.]])\n",
|
||
"tf.sparse.sparse_dense_matmul(s, s4)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"s5 = tf.SparseTensor(indices=[[0, 2], [0, 1]],\n",
|
||
" values=[1., 2.],\n",
|
||
" dense_shape=[3, 4])\n",
|
||
"print(s5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" tf.sparse.to_dense(s5)\n",
|
||
"except tf.errors.InvalidArgumentError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"s6 = tf.sparse.reorder(s5)\n",
|
||
"tf.sparse.to_dense(s6)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Sets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"set1 = tf.constant([[2, 3, 5, 7], [7, 9, 0, 0]])\n",
|
||
"set2 = tf.constant([[4, 5, 6], [9, 10, 0]])\n",
|
||
"tf.sparse.to_dense(tf.sets.union(set1, set2))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.sparse.to_dense(tf.sets.difference(set1, set2))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.sparse.to_dense(tf.sets.intersection(set1, set2))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Variables"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"v.assign(2 * v)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"v[0, 1].assign(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"v[:, 2].assign([0., 1.])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" v[1] = [7., 8., 9.]\n",
|
||
"except TypeError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"v.scatter_nd_update(indices=[[0, 0], [1, 2]],\n",
|
||
" updates=[100., 200.])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sparse_delta = tf.IndexedSlices(values=[[1., 2., 3.], [4., 5., 6.]],\n",
|
||
" indices=[1, 0])\n",
|
||
"v.scatter_update(sparse_delta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Tensor Arrays"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"array = tf.TensorArray(dtype=tf.float32, size=3)\n",
|
||
"array = array.write(0, tf.constant([1., 2.]))\n",
|
||
"array = array.write(1, tf.constant([3., 10.]))\n",
|
||
"array = array.write(2, tf.constant([5., 7.]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"array.read(1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"array.stack()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"mean, variance = tf.nn.moments(array.stack(), axes=0)\n",
|
||
"mean"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"variance"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Custom loss function"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's start by loading and preparing the California housing dataset. We first load it, then split it into a training set, a validation set and a test set, and finally we scale it:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import fetch_california_housing\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"\n",
|
||
"housing = fetch_california_housing()\n",
|
||
"X_train_full, X_test, y_train_full, y_test = train_test_split(\n",
|
||
" housing.data, housing.target.reshape(-1, 1), random_state=42)\n",
|
||
"X_train, X_valid, y_train, y_valid = train_test_split(\n",
|
||
" X_train_full, y_train_full, random_state=42)\n",
|
||
"\n",
|
||
"scaler = StandardScaler()\n",
|
||
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
||
"X_valid_scaled = scaler.transform(X_valid)\n",
|
||
"X_test_scaled = scaler.transform(X_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def huber_fn(y_true, y_pred):\n",
|
||
" error = y_true - y_pred\n",
|
||
" is_small_error = tf.abs(error) < 1\n",
|
||
" squared_loss = tf.square(error) / 2\n",
|
||
" linear_loss = tf.abs(error) - 0.5\n",
|
||
" return tf.where(is_small_error, squared_loss, linear_loss)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.figure(figsize=(8, 3.5))\n",
|
||
"z = np.linspace(-4, 4, 200)\n",
|
||
"plt.plot(z, huber_fn(0, z), \"b-\", linewidth=2, label=\"huber($z$)\")\n",
|
||
"plt.plot(z, z**2 / 2, \"b:\", linewidth=1, label=r\"$\\frac{1}{2}z^2$\")\n",
|
||
"plt.plot([-1, -1], [0, huber_fn(0., -1.)], \"r--\")\n",
|
||
"plt.plot([1, 1], [0, huber_fn(0., 1.)], \"r--\")\n",
|
||
"plt.gca().axhline(y=0, color='k')\n",
|
||
"plt.gca().axvline(x=0, color='k')\n",
|
||
"plt.axis([-4, 4, 0, 4])\n",
|
||
"plt.grid(True)\n",
|
||
"plt.xlabel(\"$z$\")\n",
|
||
"plt.legend(fontsize=14)\n",
|
||
"plt.title(\"Huber loss\", fontsize=14)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"input_shape = X_train.shape[1:]\n",
|
||
"\n",
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=huber_fn, optimizer=\"nadam\", metrics=[\"mae\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Saving/Loading Models with Custom Objects"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_a_custom_loss.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_model_with_a_custom_loss.h5\",\n",
|
||
" custom_objects={\"huber_fn\": huber_fn})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_huber(threshold=1.0):\n",
|
||
" def huber_fn(y_true, y_pred):\n",
|
||
" error = y_true - y_pred\n",
|
||
" is_small_error = tf.abs(error) < threshold\n",
|
||
" squared_loss = tf.square(error) / 2\n",
|
||
" linear_loss = threshold * tf.abs(error) - threshold**2 / 2\n",
|
||
" return tf.where(is_small_error, squared_loss, linear_loss)\n",
|
||
" return huber_fn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[\"mae\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_a_custom_loss_threshold_2.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_model_with_a_custom_loss_threshold_2.h5\",\n",
|
||
" custom_objects={\"huber_fn\": create_huber(2.0)})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class HuberLoss(tf.keras.losses.Loss):\n",
|
||
" def __init__(self, threshold=1.0, **kwargs):\n",
|
||
" self.threshold = threshold\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" def call(self, y_true, y_pred):\n",
|
||
" error = y_true - y_pred\n",
|
||
" is_small_error = tf.abs(error) < self.threshold\n",
|
||
" squared_loss = tf.square(error) / 2\n",
|
||
" linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2\n",
|
||
" return tf.where(is_small_error, squared_loss, linear_loss)\n",
|
||
" def get_config(self):\n",
|
||
" base_config = super().get_config()\n",
|
||
" return {**base_config, \"threshold\": self.threshold}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=HuberLoss(2.), optimizer=\"nadam\", metrics=[\"mae\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_a_custom_loss_class.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_model_with_a_custom_loss_class.h5\",\n",
|
||
" custom_objects={\"HuberLoss\": HuberLoss})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.loss.threshold"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Other Custom Functions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def my_softplus(z): # return value is just tf.nn.softplus(z)\n",
|
||
" return tf.math.log(tf.exp(z) + 1.0)\n",
|
||
"\n",
|
||
"def my_glorot_initializer(shape, dtype=tf.float32):\n",
|
||
" stddev = tf.sqrt(2. / (shape[0] + shape[1]))\n",
|
||
" return tf.random.normal(shape, stddev=stddev, dtype=dtype)\n",
|
||
"\n",
|
||
"def my_l1_regularizer(weights):\n",
|
||
" return tf.reduce_sum(tf.abs(0.01 * weights))\n",
|
||
"\n",
|
||
"def my_positive_weights(weights): # return value is just tf.nn.relu(weights)\n",
|
||
" return tf.where(weights < 0., tf.zeros_like(weights), weights)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"layer = tf.keras.layers.Dense(1, activation=my_softplus,\n",
|
||
" kernel_initializer=my_glorot_initializer,\n",
|
||
" kernel_regularizer=my_l1_regularizer,\n",
|
||
" kernel_constraint=my_positive_weights)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1, activation=my_softplus,\n",
|
||
" kernel_regularizer=my_l1_regularizer,\n",
|
||
" kernel_constraint=my_positive_weights,\n",
|
||
" kernel_initializer=my_glorot_initializer),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[\"mae\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_many_custom_parts.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\n",
|
||
" \"my_model_with_many_custom_parts.h5\",\n",
|
||
" custom_objects={\n",
|
||
" \"my_l1_regularizer\": my_l1_regularizer,\n",
|
||
" \"my_positive_weights\": my_positive_weights,\n",
|
||
" \"my_glorot_initializer\": my_glorot_initializer,\n",
|
||
" \"my_softplus\": my_softplus,\n",
|
||
" })"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class MyL1Regularizer(tf.keras.regularizers.Regularizer):\n",
|
||
" def __init__(self, factor):\n",
|
||
" self.factor = factor\n",
|
||
" def __call__(self, weights):\n",
|
||
" return tf.reduce_sum(tf.abs(self.factor * weights))\n",
|
||
" def get_config(self):\n",
|
||
" return {\"factor\": self.factor}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1, activation=my_softplus,\n",
|
||
" kernel_regularizer=MyL1Regularizer(0.01),\n",
|
||
" kernel_constraint=my_positive_weights,\n",
|
||
" kernel_initializer=my_glorot_initializer),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[\"mae\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_many_custom_parts.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\n",
|
||
" \"my_model_with_many_custom_parts.h5\",\n",
|
||
" custom_objects={\n",
|
||
" \"MyL1Regularizer\": MyL1Regularizer,\n",
|
||
" \"my_positive_weights\": my_positive_weights,\n",
|
||
" \"my_glorot_initializer\": my_glorot_initializer,\n",
|
||
" \"my_softplus\": my_softplus,\n",
|
||
" })"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Custom Metrics"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\", metrics=[create_huber(2.0)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Note**: if you use the same function as the loss and a metric, you may be surprised to see different results. This is generally just due to floating point precision errors: even though the mathematical equations are equivalent, the operations are not run in the same order, which can lead to small differences. Moreover, when using sample weights, there's more than just precision errors:\n",
|
||
"* the loss since the start of the epoch is the mean of all batch losses seen so far. Each batch loss is the sum of the weighted instance losses divided by the _batch size_ (not the sum of weights, so the batch loss is _not_ the weighted mean of the losses).\n",
|
||
"* the metric since the start of the epoch is equal to the sum of weighted instance losses divided by sum of all weights seen so far. In other words, it is the weighted mean of all the instance losses. Not the same thing.\n",
|
||
"\n",
|
||
"If you do the math, you will find that loss = metric * mean of sample weights (plus some floating point precision error)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[create_huber(2.0)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 104,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sample_weight = np.random.rand(len(y_train))\n",
|
||
"history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history.history[\"loss\"][0], history.history[\"huber_fn\"][0] * sample_weight.mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Streaming metrics"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 106,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"precision = tf.keras.metrics.Precision()\n",
|
||
"precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 107,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"precision.result()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 109,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"precision.variables"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"precision.reset_states()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Creating a streaming metric:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 111,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class HuberMetric(tf.keras.metrics.Metric):\n",
|
||
" def __init__(self, threshold=1.0, **kwargs):\n",
|
||
" super().__init__(**kwargs) # handles base args (e.g., dtype)\n",
|
||
" self.threshold = threshold\n",
|
||
" self.huber_fn = create_huber(threshold)\n",
|
||
" self.total = self.add_weight(\"total\", initializer=\"zeros\")\n",
|
||
" self.count = self.add_weight(\"count\", initializer=\"zeros\")\n",
|
||
" def update_state(self, y_true, y_pred, sample_weight=None):\n",
|
||
" metric = self.huber_fn(y_true, y_pred)\n",
|
||
" self.total.assign_add(tf.reduce_sum(metric))\n",
|
||
" self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))\n",
|
||
" def result(self):\n",
|
||
" return self.total / self.count\n",
|
||
" def get_config(self):\n",
|
||
" base_config = super().get_config()\n",
|
||
" return {**base_config, \"threshold\": self.threshold}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 112,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"m = HuberMetric(2.)\n",
|
||
"\n",
|
||
"# total = 2 * |10 - 2| - 2²/2 = 14\n",
|
||
"# count = 1\n",
|
||
"# result = 14 / 1 = 14\n",
|
||
"m(tf.constant([[2.]]), tf.constant([[10.]])) "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# total = total + (|1 - 0|² / 2) + (2 * |9.25 - 5| - 2² / 2) = 14 + 7 = 21\n",
|
||
"# count = count + 2 = 3\n",
|
||
"# result = total / count = 21 / 3 = 7\n",
|
||
"m(tf.constant([[0.], [5.]]), tf.constant([[1.], [9.25]]))\n",
|
||
"\n",
|
||
"m.result()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"m.variables"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"m.reset_states()\n",
|
||
"m.variables"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's check that the `HuberMetric` class works well:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 116,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 117,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 118,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=create_huber(2.0), optimizer=\"nadam\", metrics=[HuberMetric(2.0)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 119,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32), epochs=2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 120,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_a_custom_metric.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 121,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_model_with_a_custom_metric.h5\",\n",
|
||
" custom_objects={\"huber_fn\": create_huber(2.0),\n",
|
||
" \"HuberMetric\": HuberMetric})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 122,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32), epochs=2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Warning**: In TF 2.2, tf.keras adds an extra first metric in `model.metrics` at position 0 (see [TF issue #38150](https://github.com/tensorflow/tensorflow/issues/38150)). This forces us to use `model.metrics[-1]` rather than `model.metrics[0]` to access the `HuberMetric`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 123,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.metrics[-1].threshold"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Looks like it works fine! More simply, we could have created the class like this:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 124,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class HuberMetric(tf.keras.metrics.Mean):\n",
|
||
" def __init__(self, threshold=1.0, name='HuberMetric', dtype=None):\n",
|
||
" self.threshold = threshold\n",
|
||
" self.huber_fn = create_huber(threshold)\n",
|
||
" super().__init__(name=name, dtype=dtype)\n",
|
||
" def update_state(self, y_true, y_pred, sample_weight=None):\n",
|
||
" metric = self.huber_fn(y_true, y_pred)\n",
|
||
" super(HuberMetric, self).update_state(metric, sample_weight)\n",
|
||
" def get_config(self):\n",
|
||
" base_config = super().get_config()\n",
|
||
" return {**base_config, \"threshold\": self.threshold} "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"This class handles shapes better, and it also supports sample weights."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 125,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 126,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n",
|
||
" input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 127,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=tf.keras.losses.Huber(2.0), optimizer=\"nadam\", weighted_metrics=[HuberMetric(2.0)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 128,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"sample_weight = np.random.rand(len(y_train))\n",
|
||
"history = model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32),\n",
|
||
" epochs=2, sample_weight=sample_weight)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 129,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history.history[\"loss\"][0], history.history[\"HuberMetric\"][0] * sample_weight.mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 130,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_a_custom_metric_v2.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 131,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_model_with_a_custom_metric_v2.h5\",\n",
|
||
" custom_objects={\"HuberMetric\": HuberMetric})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 132,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32), epochs=2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 133,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.metrics[-1].threshold"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Custom Layers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 134,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"exponential_layer = tf.keras.layers.Lambda(lambda x: tf.exp(x))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 135,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"exponential_layer([-1., 0., 1.])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Adding an exponential layer at the output of a regression model can be useful if the values to predict are positive and with very different scales (e.g., 0.001, 10., 10000):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 136,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 137,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"relu\", input_shape=input_shape),\n",
|
||
" tf.keras.layers.Dense(1),\n",
|
||
" exponential_layer\n",
|
||
"])\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"sgd\")\n",
|
||
"model.fit(X_train_scaled, y_train, epochs=5,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))\n",
|
||
"model.evaluate(X_test_scaled, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 138,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class MyDense(tf.keras.layers.Layer):\n",
|
||
" def __init__(self, units, activation=None, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.units = units\n",
|
||
" self.activation = tf.keras.activations.get(activation)\n",
|
||
"\n",
|
||
" def build(self, batch_input_shape):\n",
|
||
" self.kernel = self.add_weight(\n",
|
||
" name=\"kernel\", shape=[batch_input_shape[-1], self.units],\n",
|
||
" initializer=\"glorot_normal\")\n",
|
||
" self.bias = self.add_weight(\n",
|
||
" name=\"bias\", shape=[self.units], initializer=\"zeros\")\n",
|
||
" super().build(batch_input_shape) # must be at the end\n",
|
||
"\n",
|
||
" def call(self, X):\n",
|
||
" return self.activation(X @ self.kernel + self.bias)\n",
|
||
"\n",
|
||
" def compute_output_shape(self, batch_input_shape):\n",
|
||
" return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])\n",
|
||
"\n",
|
||
" def get_config(self):\n",
|
||
" base_config = super().get_config()\n",
|
||
" return {**base_config, \"units\": self.units,\n",
|
||
" \"activation\": tf.keras.activations.serialize(self.activation)}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 139,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 140,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" MyDense(30, activation=\"relu\", input_shape=input_shape),\n",
|
||
" MyDense(1)\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 141,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))\n",
|
||
"model.evaluate(X_test_scaled, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 142,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_model_with_a_custom_layer.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 143,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_model_with_a_custom_layer.h5\",\n",
|
||
" custom_objects={\"MyDense\": MyDense})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 144,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class MyMultiLayer(tf.keras.layers.Layer):\n",
|
||
" def call(self, X):\n",
|
||
" X1, X2 = X\n",
|
||
" print(\"X1.shape: \", X1.shape ,\" X2.shape: \", X2.shape) # Debugging of custom layer\n",
|
||
" return X1 + X2, X1 * X2\n",
|
||
"\n",
|
||
" def compute_output_shape(self, batch_input_shape):\n",
|
||
" batch_input_shape1, batch_input_shape2 = batch_input_shape\n",
|
||
" return [batch_input_shape1, batch_input_shape2]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Our custom layer can be called using the functional API like this:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 145,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"inputs1 = tf.keras.layers.Input(shape=[2])\n",
|
||
"inputs2 = tf.keras.layers.Input(shape=[2])\n",
|
||
"outputs1, outputs2 = MyMultiLayer()((inputs1, inputs2))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Note that the `call()` method receives symbolic inputs, whose shape is only partially specified (at this stage, we don't know the batch size, which is why the first dimension is `None`):\n",
|
||
"\n",
|
||
"We can also pass actual data to the custom layer. To test this, let's split each dataset's inputs into two parts, with four features each:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 146,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def split_data(data):\n",
|
||
" columns_count = data.shape[-1]\n",
|
||
" half = columns_count // 2\n",
|
||
" return data[:, :half], data[:, half:]\n",
|
||
"\n",
|
||
"X_train_scaled_A, X_train_scaled_B = split_data(X_train_scaled)\n",
|
||
"X_valid_scaled_A, X_valid_scaled_B = split_data(X_valid_scaled)\n",
|
||
"X_test_scaled_A, X_test_scaled_B = split_data(X_test_scaled)\n",
|
||
"\n",
|
||
"# Printing the splitted data shapes\n",
|
||
"X_train_scaled_A.shape, X_train_scaled_B.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now notice that the shapes are fully specified:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 147,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"outputs1, outputs2 = MyMultiLayer()((X_train_scaled_A, X_train_scaled_B))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's build a more complete model using the functional API (this is just a toy example, don't expect awesome performance):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 148,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"input_A = tf.keras.layers.Input(shape=X_train_scaled_A.shape[-1])\n",
|
||
"input_B = tf.keras.layers.Input(shape=X_train_scaled_B.shape[-1])\n",
|
||
"hidden_A, hidden_B = MyMultiLayer()((input_A, input_B))\n",
|
||
"hidden_A = tf.keras.layers.Dense(30, activation='selu')(hidden_A)\n",
|
||
"hidden_B = tf.keras.layers.Dense(30, activation='selu')(hidden_B)\n",
|
||
"concat = tf.keras.layers.Concatenate()((hidden_A, hidden_B))\n",
|
||
"output = tf.keras.layers.Dense(1)(concat)\n",
|
||
"model = tf.keras.Model(inputs=[input_A, input_B], outputs=[output])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 149,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss='mse', optimizer='nadam')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 150,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit((X_train_scaled_A, X_train_scaled_B), y_train, epochs=2,\n",
|
||
" validation_data=((X_valid_scaled_A, X_valid_scaled_B), y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Now let's create a layer with a different behavior during training and testing:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 151,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class AddGaussianNoise(tf.keras.layers.Layer):\n",
|
||
" def __init__(self, stddev, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.stddev = stddev\n",
|
||
"\n",
|
||
" def call(self, X, training=None):\n",
|
||
" if training:\n",
|
||
" noise = tf.random.normal(tf.shape(X), stddev=self.stddev)\n",
|
||
" return X + noise\n",
|
||
" else:\n",
|
||
" return X\n",
|
||
"\n",
|
||
" def compute_output_shape(self, batch_input_shape):\n",
|
||
" return batch_input_shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Here's a simple model that uses this custom layer:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 152,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)\n",
|
||
"\n",
|
||
"model = tf.keras.Sequential([\n",
|
||
" AddGaussianNoise(stddev=1.0),\n",
|
||
" tf.keras.layers.Dense(30, activation=\"selu\"),\n",
|
||
" tf.keras.layers.Dense(1)\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 153,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))\n",
|
||
"model.evaluate(X_test_scaled, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Custom Models"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 154,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_new_scaled = X_test_scaled"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 155,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class ResidualBlock(tf.keras.layers.Layer):\n",
|
||
" def __init__(self, n_layers, n_neurons, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.hidden = [tf.keras.layers.Dense(n_neurons, activation=\"elu\",\n",
|
||
" kernel_initializer=\"he_normal\")\n",
|
||
" for _ in range(n_layers)]\n",
|
||
"\n",
|
||
" def call(self, inputs):\n",
|
||
" Z = inputs\n",
|
||
" for layer in self.hidden:\n",
|
||
" Z = layer(Z)\n",
|
||
" return inputs + Z"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 156,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class ResidualRegressor(tf.keras.Model):\n",
|
||
" def __init__(self, output_dim, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.hidden1 = tf.keras.layers.Dense(30, activation=\"elu\",\n",
|
||
" kernel_initializer=\"he_normal\")\n",
|
||
" self.block1 = ResidualBlock(2, 30)\n",
|
||
" self.block2 = ResidualBlock(2, 30)\n",
|
||
" self.out = tf.keras.layers.Dense(output_dim)\n",
|
||
"\n",
|
||
" def call(self, inputs):\n",
|
||
" Z = self.hidden1(inputs)\n",
|
||
" for _ in range(1 + 3):\n",
|
||
" Z = self.block1(Z)\n",
|
||
" Z = self.block2(Z)\n",
|
||
" return self.out(Z)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 157,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 158,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = ResidualRegressor(1)\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
|
||
"history = model.fit(X_train_scaled, y_train, epochs=5)\n",
|
||
"score = model.evaluate(X_test_scaled, y_test)\n",
|
||
"y_pred = model.predict(X_new_scaled)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 159,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_custom_model.ckpt\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 160,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.models.load_model(\"my_custom_model.ckpt\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 161,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history = model.fit(X_train_scaled, y_train, epochs=5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"We could have defined the model using the sequential API instead:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 162,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 163,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"block1 = ResidualBlock(2, 30)\n",
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"elu\", kernel_initializer=\"he_normal\"),\n",
|
||
" block1, block1, block1, block1,\n",
|
||
" ResidualBlock(2, 30),\n",
|
||
" tf.keras.layers.Dense(1)\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 164,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
|
||
"history = model.fit(X_train_scaled, y_train, epochs=5)\n",
|
||
"score = model.evaluate(X_test_scaled, y_test)\n",
|
||
"y_pred = model.predict(X_new_scaled)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Losses and Metrics Based on Model Internals"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Note**: the following code has two differences with the code in the book:\n",
|
||
"1. It creates a `tf.keras.metrics.Mean()` metric in the constructor and uses it in the `call()` method to track the mean reconstruction loss. Since we only want to do this during training, we add a `training` argument to the `call()` method, and if `training` is `True`, then we update `reconstruction_mean` and we call `self.add_metric()` to ensure it's displayed properly.\n",
|
||
"2. Due to an issue introduced in TF 2.2 ([#46858](https://github.com/tensorflow/tensorflow/issues/46858)), we must not call `super().build()` inside the `build()` method."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 165,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class ReconstructingRegressor(tf.keras.Model):\n",
|
||
" def __init__(self, output_dim, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.hidden = [tf.keras.layers.Dense(30, activation=\"selu\",\n",
|
||
" kernel_initializer=\"lecun_normal\")\n",
|
||
" for _ in range(5)]\n",
|
||
" self.out = tf.keras.layers.Dense(output_dim)\n",
|
||
" self.reconstruction_mean = tf.keras.metrics.Mean(name=\"reconstruction_error\")\n",
|
||
"\n",
|
||
" def build(self, batch_input_shape):\n",
|
||
" n_inputs = batch_input_shape[-1]\n",
|
||
" self.reconstruct = tf.keras.layers.Dense(n_inputs)\n",
|
||
" #super().build(batch_input_shape)\n",
|
||
"\n",
|
||
" def call(self, inputs, training=None):\n",
|
||
" Z = inputs\n",
|
||
" for layer in self.hidden:\n",
|
||
" Z = layer(Z)\n",
|
||
" reconstruction = self.reconstruct(Z)\n",
|
||
" recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))\n",
|
||
" self.add_loss(0.05 * recon_loss)\n",
|
||
" if training:\n",
|
||
" result = self.reconstruction_mean(recon_loss)\n",
|
||
" self.add_metric(result)\n",
|
||
" return self.out(Z)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 166,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 167,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = ReconstructingRegressor(1)\n",
|
||
"model.compile(loss=\"mse\", optimizer=\"nadam\")\n",
|
||
"history = model.fit(X_train_scaled, y_train, epochs=2)\n",
|
||
"y_pred = model.predict(X_test_scaled)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Computing Gradients with Autodiff"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 168,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def f(w1, w2):\n",
|
||
" return 3 * w1 ** 2 + 2 * w1 * w2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 169,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"w1, w2 = 5, 3\n",
|
||
"eps = 1e-6\n",
|
||
"(f(w1 + eps, w2) - f(w1, w2)) / eps"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 170,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"(f(w1, w2 + eps) - f(w1, w2)) / eps"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 171,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"w1, w2 = tf.Variable(5.), tf.Variable(3.)\n",
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = f(w1, w2)\n",
|
||
"\n",
|
||
"gradients = tape.gradient(z, [w1, w2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 172,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"gradients"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 173,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = f(w1, w2)\n",
|
||
"\n",
|
||
"dz_dw1 = tape.gradient(z, w1)\n",
|
||
"try:\n",
|
||
" dz_dw2 = tape.gradient(z, w2)\n",
|
||
"except RuntimeError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 174,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.GradientTape(persistent=True) as tape:\n",
|
||
" z = f(w1, w2)\n",
|
||
"\n",
|
||
"dz_dw1 = tape.gradient(z, w1)\n",
|
||
"dz_dw2 = tape.gradient(z, w2) # works now!\n",
|
||
"del tape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 175,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"dz_dw1, dz_dw2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 176,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c1, c2 = tf.constant(5.), tf.constant(3.)\n",
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = f(c1, c2)\n",
|
||
"\n",
|
||
"gradients = tape.gradient(z, [c1, c2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 177,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"gradients"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 178,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.GradientTape() as tape:\n",
|
||
" tape.watch(c1)\n",
|
||
" tape.watch(c2)\n",
|
||
" z = f(c1, c2)\n",
|
||
"\n",
|
||
"gradients = tape.gradient(z, [c1, c2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 179,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"gradients"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 180,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z1 = f(w1, w2 + 2.)\n",
|
||
" z2 = f(w1, w2 + 5.)\n",
|
||
" z3 = f(w1, w2 + 7.)\n",
|
||
"\n",
|
||
"tape.gradient([z1, z2, z3], [w1, w2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 181,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.GradientTape(persistent=True) as tape:\n",
|
||
" z1 = f(w1, w2 + 2.)\n",
|
||
" z2 = f(w1, w2 + 5.)\n",
|
||
" z3 = f(w1, w2 + 7.)\n",
|
||
"\n",
|
||
"tf.reduce_sum(tf.stack([tape.gradient(z, [w1, w2]) for z in (z1, z2, z3)]), axis=0)\n",
|
||
"del tape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 182,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.GradientTape(persistent=True) as hessian_tape:\n",
|
||
" with tf.GradientTape() as jacobian_tape:\n",
|
||
" z = f(w1, w2)\n",
|
||
" jacobians = jacobian_tape.gradient(z, [w1, w2])\n",
|
||
"hessians = [hessian_tape.gradient(jacobian, [w1, w2])\n",
|
||
" for jacobian in jacobians]\n",
|
||
"del hessian_tape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 183,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"jacobians"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 184,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"hessians"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 185,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def f(w1, w2):\n",
|
||
" return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)\n",
|
||
"\n",
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = f(w1, w2)\n",
|
||
"\n",
|
||
"tape.gradient(z, [w1, w2])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 186,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"x = tf.Variable(100.)\n",
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = my_softplus(x)\n",
|
||
"\n",
|
||
"tape.gradient(z, [x])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 187,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.math.log(tf.exp(tf.constant(30., dtype=tf.float32)) + 1.)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 188,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"x = tf.Variable([100.])\n",
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = my_softplus(x)\n",
|
||
"\n",
|
||
"tape.gradient(z, [x])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 189,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.custom_gradient\n",
|
||
"def my_better_softplus(z):\n",
|
||
" exp = tf.exp(z)\n",
|
||
" def my_softplus_gradients(grad):\n",
|
||
" return grad / (1 + 1 / exp)\n",
|
||
" return tf.math.log(exp + 1), my_softplus_gradients"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 190,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def my_better_softplus(z):\n",
|
||
" return tf.where(z > 30., z, tf.math.log(tf.exp(z) + 1.))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 191,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"x = tf.Variable([1000.])\n",
|
||
"with tf.GradientTape() as tape:\n",
|
||
" z = my_better_softplus(x)\n",
|
||
"\n",
|
||
"z, tape.gradient(z, [x])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Computing Gradients Using Autodiff"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 192,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 193,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"l2_reg = tf.keras.regularizers.l2(0.05)\n",
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(30, activation=\"elu\", kernel_initializer=\"he_normal\",\n",
|
||
" kernel_regularizer=l2_reg),\n",
|
||
" tf.keras.layers.Dense(1, kernel_regularizer=l2_reg)\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 194,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def random_batch(X, y, batch_size=32):\n",
|
||
" idx = np.random.randint(len(X), size=batch_size)\n",
|
||
" return X[idx], y[idx]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 195,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def print_status_bar(iteration, total, loss, metrics=None):\n",
|
||
" metrics = \" - \".join([\"{}: {:.4f}\".format(m.name, m.result())\n",
|
||
" for m in [loss] + (metrics or [])])\n",
|
||
" end = \"\" if iteration < total else \"\\n\"\n",
|
||
" print(\"\\r{}/{} - \".format(iteration, total) + metrics,\n",
|
||
" end=end)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 196,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import time\n",
|
||
"\n",
|
||
"mean_loss = tf.keras.metrics.Mean(name=\"loss\")\n",
|
||
"mean_square = tf.keras.metrics.Mean(name=\"mean_square\")\n",
|
||
"for i in range(1, 50 + 1):\n",
|
||
" loss = 1 / i\n",
|
||
" mean_loss(loss)\n",
|
||
" mean_square(i ** 2)\n",
|
||
" print_status_bar(i, 50, mean_loss, [mean_square])\n",
|
||
" time.sleep(0.05)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"A fancier version with a progress bar:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 197,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def progress_bar(iteration, total, size=30):\n",
|
||
" running = iteration < total\n",
|
||
" c = \">\" if running else \"=\"\n",
|
||
" p = (size - 1) * iteration // total\n",
|
||
" fmt = \"{{:-{}d}}/{{}} [{{}}]\".format(len(str(total)))\n",
|
||
" params = [iteration, total, \"=\" * p + c + \".\" * (size - p - 1)]\n",
|
||
" return fmt.format(*params)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 198,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"progress_bar(3500, 10000, size=6)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 199,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def print_status_bar(iteration, total, loss, metrics=None, size=30):\n",
|
||
" metrics = \" - \".join([\"{}: {:.4f}\".format(m.name, m.result())\n",
|
||
" for m in [loss] + (metrics or [])])\n",
|
||
" end = \"\" if iteration < total else \"\\n\"\n",
|
||
" print(\"\\r{} - {}\".format(progress_bar(iteration, total), metrics), end=end)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 200,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"mean_loss = tf.keras.metrics.Mean(name=\"loss\")\n",
|
||
"mean_square = tf.keras.metrics.Mean(name=\"mean_square\")\n",
|
||
"for i in range(1, 50 + 1):\n",
|
||
" loss = 1 / i\n",
|
||
" mean_loss(loss)\n",
|
||
" mean_square(i ** 2)\n",
|
||
" print_status_bar(i, 50, mean_loss, [mean_square])\n",
|
||
" time.sleep(0.05)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 201,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 202,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 5\n",
|
||
"batch_size = 32\n",
|
||
"n_steps = len(X_train) // batch_size\n",
|
||
"optimizer = tf.keras.optimizers.Nadam(learning_rate=0.01)\n",
|
||
"loss_fn = tf.keras.losses.mean_squared_error\n",
|
||
"mean_loss = tf.keras.metrics.Mean()\n",
|
||
"metrics = [tf.keras.metrics.MeanAbsoluteError()]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 203,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for epoch in range(1, n_epochs + 1):\n",
|
||
" print(\"Epoch {}/{}\".format(epoch, n_epochs))\n",
|
||
" for step in range(1, n_steps + 1):\n",
|
||
" X_batch, y_batch = random_batch(X_train_scaled, y_train)\n",
|
||
" with tf.GradientTape() as tape:\n",
|
||
" y_pred = model(X_batch)\n",
|
||
" main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
|
||
" loss = tf.add_n([main_loss] + model.losses)\n",
|
||
" gradients = tape.gradient(loss, model.trainable_variables)\n",
|
||
" optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
|
||
" for variable in model.variables:\n",
|
||
" if variable.constraint is not None:\n",
|
||
" variable.assign(variable.constraint(variable))\n",
|
||
" mean_loss(loss)\n",
|
||
" for metric in metrics:\n",
|
||
" metric(y_batch, y_pred)\n",
|
||
" print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)\n",
|
||
" print_status_bar(len(y_train), len(y_train), mean_loss, metrics)\n",
|
||
" for metric in [mean_loss] + metrics:\n",
|
||
" metric.reset_states()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 204,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" from tqdm.notebook import trange\n",
|
||
" from collections import OrderedDict\n",
|
||
" with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
|
||
" for epoch in epochs:\n",
|
||
" with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
|
||
" for step in steps:\n",
|
||
" X_batch, y_batch = random_batch(X_train_scaled, y_train)\n",
|
||
" with tf.GradientTape() as tape:\n",
|
||
" y_pred = model(X_batch)\n",
|
||
" main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
|
||
" loss = tf.add_n([main_loss] + model.losses)\n",
|
||
" gradients = tape.gradient(loss, model.trainable_variables)\n",
|
||
" optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
|
||
" for variable in model.variables:\n",
|
||
" if variable.constraint is not None:\n",
|
||
" variable.assign(variable.constraint(variable)) \n",
|
||
" status = OrderedDict()\n",
|
||
" mean_loss(loss)\n",
|
||
" status[\"loss\"] = mean_loss.result().numpy()\n",
|
||
" for metric in metrics:\n",
|
||
" metric(y_batch, y_pred)\n",
|
||
" status[metric.name] = metric.result().numpy()\n",
|
||
" steps.set_postfix(status)\n",
|
||
" for metric in [mean_loss] + metrics:\n",
|
||
" metric.reset_states()\n",
|
||
"except ImportError as ex:\n",
|
||
" print(\"To run this cell, please install tqdm, ipywidgets and restart Jupyter\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## TensorFlow Functions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 205,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def cube(x):\n",
|
||
" return x ** 3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 206,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cube(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 207,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cube(tf.constant(2.0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 208,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf_cube = tf.function(cube)\n",
|
||
"tf_cube"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 209,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf_cube(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 210,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf_cube(tf.constant(2.0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### TF Functions and Concrete Functions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 211,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function = tf_cube.get_concrete_function(tf.constant(2.0))\n",
|
||
"concrete_function.graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 212,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function(tf.constant(2.0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 213,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function is tf_cube.get_concrete_function(tf.constant(2.0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Exploring Function Definitions and Graphs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 214,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function.graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 215,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ops = concrete_function.graph.get_operations()\n",
|
||
"ops"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 216,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pow_op = ops[2]\n",
|
||
"list(pow_op.inputs)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 217,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pow_op.outputs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 218,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function.graph.get_operation_by_name('x')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 219,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function.graph.get_tensor_by_name('Identity:0')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 220,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"concrete_function.function_def.signature"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### How TF Functions Trace Python Functions to Extract Their Computation Graphs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 221,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.function\n",
|
||
"def tf_cube(x):\n",
|
||
" print(\"print:\", x)\n",
|
||
" return x ** 3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 222,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"result = tf_cube(tf.constant(2.0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 223,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"result"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 224,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"result = tf_cube(2)\n",
|
||
"result = tf_cube(3)\n",
|
||
"result = tf_cube(tf.constant([[1., 2.]])) # New shape: trace!\n",
|
||
"result = tf_cube(tf.constant([[3., 4.], [5., 6.]])) # New shape: trace!\n",
|
||
"result = tf_cube(tf.constant([[7., 8.], [9., 10.], [11., 12.]])) # New shape: trace!\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"It is also possible to specify a particular input signature:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 225,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.function(input_signature=[tf.TensorSpec([None, 28, 28], tf.float32)])\n",
|
||
"def shrink(images):\n",
|
||
" print(\"Tracing\", images)\n",
|
||
" return images[:, ::2, ::2] # drop half the rows and columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 226,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 227,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"img_batch_1 = tf.random.uniform(shape=[100, 28, 28])\n",
|
||
"img_batch_2 = tf.random.uniform(shape=[50, 28, 28])\n",
|
||
"preprocessed_images = shrink(img_batch_1) # Traces the function.\n",
|
||
"preprocessed_images = shrink(img_batch_2) # Reuses the same concrete function."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 228,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"img_batch_3 = tf.random.uniform(shape=[2, 2, 2])\n",
|
||
"try:\n",
|
||
" preprocessed_images = shrink(img_batch_3) # rejects unexpected types or shapes\n",
|
||
"except ValueError as ex:\n",
|
||
" print(ex)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Using Autograph To Capture Control Flow"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"A \"static\" `for` loop using `range()`:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 229,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.function\n",
|
||
"def add_10(x):\n",
|
||
" for i in range(10):\n",
|
||
" x += 1\n",
|
||
" return x"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 230,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"add_10(tf.constant(5))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 231,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"add_10.get_concrete_function(tf.constant(5)).graph.get_operations()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"A \"dynamic\" loop using `tf.while_loop()`:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 232,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.function\n",
|
||
"def add_10(x):\n",
|
||
" condition = lambda i, x: tf.less(i, 10)\n",
|
||
" body = lambda i, x: (tf.add(i, 1), tf.add(x, 1))\n",
|
||
" final_i, final_x = tf.while_loop(condition, body, [tf.constant(0), x])\n",
|
||
" return final_x"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 233,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"add_10(tf.constant(5))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 234,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"add_10.get_concrete_function(tf.constant(5)).graph.get_operations()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"A \"dynamic\" `for` loop using `tf.range()` (captured by autograph):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 235,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.function\n",
|
||
"def add_10(x):\n",
|
||
" for i in tf.range(10):\n",
|
||
" x = x + 1\n",
|
||
" return x"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 236,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"add_10.get_concrete_function(tf.constant(0)).graph.get_operations()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Handling Variables and Other Resources in TF Functions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 237,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"counter = tf.Variable(0)\n",
|
||
"\n",
|
||
"@tf.function\n",
|
||
"def increment(counter, c=1):\n",
|
||
" return counter.assign_add(c)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 238,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"increment(counter)\n",
|
||
"increment(counter)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 239,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"function_def = increment.get_concrete_function(counter).function_def\n",
|
||
"function_def.signature.input_arg[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 240,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"counter = tf.Variable(0)\n",
|
||
"\n",
|
||
"@tf.function\n",
|
||
"def increment(c=1):\n",
|
||
" return counter.assign_add(c)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 241,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"increment()\n",
|
||
"increment()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 242,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"function_def = increment.get_concrete_function().function_def\n",
|
||
"function_def.signature.input_arg[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 243,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class Counter:\n",
|
||
" def __init__(self):\n",
|
||
" self.counter = tf.Variable(0)\n",
|
||
"\n",
|
||
" @tf.function\n",
|
||
" def increment(self, c=1):\n",
|
||
" return self.counter.assign_add(c)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 244,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c = Counter()\n",
|
||
"c.increment()\n",
|
||
"c.increment()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 245,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"@tf.function\n",
|
||
"def add_10(x):\n",
|
||
" for i in tf.range(10):\n",
|
||
" x += 1\n",
|
||
" return x\n",
|
||
"\n",
|
||
"print(tf.autograph.to_code(add_10.python_function))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 246,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def display_tf_code(func):\n",
|
||
" from IPython.display import display, Markdown\n",
|
||
" if hasattr(func, \"python_function\"):\n",
|
||
" func = func.python_function\n",
|
||
" code = tf.autograph.to_code(func)\n",
|
||
" display(Markdown('```python\\n{}\\n```'.format(code)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 247,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"display_tf_code(add_10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using TF Functions with tf.keras (or Not)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"By default, tf.keras will automatically convert your custom code into TF Functions, no need to use\n",
|
||
"`tf.function()`:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 248,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Custom loss function\n",
|
||
"def my_mse(y_true, y_pred):\n",
|
||
" print(\"Tracing loss my_mse()\")\n",
|
||
" return tf.reduce_mean(tf.square(y_pred - y_true))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 249,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Custom metric function\n",
|
||
"def my_mae(y_true, y_pred):\n",
|
||
" print(\"Tracing metric my_mae()\")\n",
|
||
" return tf.reduce_mean(tf.abs(y_pred - y_true))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 250,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Custom layer\n",
|
||
"class MyDense(tf.keras.layers.Layer):\n",
|
||
" def __init__(self, units, activation=None, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.units = units\n",
|
||
" self.activation = tf.keras.activations.get(activation)\n",
|
||
"\n",
|
||
" def build(self, input_shape):\n",
|
||
" self.kernel = self.add_weight(name='kernel', \n",
|
||
" shape=(input_shape[1], self.units),\n",
|
||
" initializer='uniform',\n",
|
||
" trainable=True)\n",
|
||
" self.biases = self.add_weight(name='bias', \n",
|
||
" shape=(self.units,),\n",
|
||
" initializer='zeros',\n",
|
||
" trainable=True)\n",
|
||
" super().build(input_shape)\n",
|
||
"\n",
|
||
" def call(self, X):\n",
|
||
" print(\"Tracing MyDense.call()\")\n",
|
||
" return self.activation(X @ self.kernel + self.biases)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 251,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 252,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Custom model\n",
|
||
"class MyModel(tf.keras.Model):\n",
|
||
" def __init__(self, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.hidden1 = MyDense(30, activation=\"relu\")\n",
|
||
" self.hidden2 = MyDense(30, activation=\"relu\")\n",
|
||
" self.output_ = MyDense(1)\n",
|
||
"\n",
|
||
" def call(self, input):\n",
|
||
" print(\"Tracing MyModel.call()\")\n",
|
||
" hidden1 = self.hidden1(input)\n",
|
||
" hidden2 = self.hidden2(hidden1)\n",
|
||
" concat = tf.keras.layers.concatenate([input, hidden2])\n",
|
||
" output = self.output_(concat)\n",
|
||
" return output\n",
|
||
"\n",
|
||
"model = MyModel()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 253,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 254,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled, y_train, epochs=2,\n",
|
||
" validation_data=(X_valid_scaled, y_valid))\n",
|
||
"model.evaluate(X_test_scaled, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"You can turn this off by creating the model with `dynamic=True` (or calling `super().__init__(dynamic=True, **kwargs)` in the model's constructor):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 255,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 256,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = MyModel(dynamic=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 257,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Not the custom code will be called at each iteration. Let's fit, validate and evaluate with tiny datasets to avoid getting too much output:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 258,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled[:64], y_train[:64], epochs=1,\n",
|
||
" validation_data=(X_valid_scaled[:64], y_valid[:64]), verbose=0)\n",
|
||
"model.evaluate(X_test_scaled[:64], y_test[:64], verbose=0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Alternatively, you can compile a model with `run_eagerly=True`:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 259,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 260,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = MyModel()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 261,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=my_mse, optimizer=\"nadam\", metrics=[my_mae], run_eagerly=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 262,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.fit(X_train_scaled[:64], y_train[:64], epochs=1,\n",
|
||
" validation_data=(X_valid_scaled[:64], y_valid[:64]), verbose=0)\n",
|
||
"model.evaluate(X_test_scaled[:64], y_test[:64], verbose=0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Custom Optimizers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Defining custom optimizers is not very common, but in case you are one of the happy few who gets to write one, here is an example:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 263,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class MyMomentumOptimizer(tf.keras.optimizers.Optimizer):\n",
|
||
" def __init__(self, learning_rate=0.001, momentum=0.9, name=\"MyMomentumOptimizer\", **kwargs):\n",
|
||
" \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n",
|
||
" super().__init__(name, **kwargs)\n",
|
||
" self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n",
|
||
" self._set_hyper(\"decay\", self._initial_decay) # \n",
|
||
" self._set_hyper(\"momentum\", momentum)\n",
|
||
" \n",
|
||
" def _create_slots(self, var_list):\n",
|
||
" \"\"\"For each model variable, create the optimizer variable associated with it.\n",
|
||
" TensorFlow calls these optimizer variables \"slots\".\n",
|
||
" For momentum optimization, we need one momentum slot per model variable.\n",
|
||
" \"\"\"\n",
|
||
" for var in var_list:\n",
|
||
" self.add_slot(var, \"momentum\")\n",
|
||
"\n",
|
||
" @tf.function\n",
|
||
" def _resource_apply_dense(self, grad, var):\n",
|
||
" \"\"\"Update the slots and perform one optimization step for one model variable\n",
|
||
" \"\"\"\n",
|
||
" var_dtype = var.dtype.base_dtype\n",
|
||
" lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n",
|
||
" momentum_var = self.get_slot(var, \"momentum\")\n",
|
||
" momentum_hyper = self._get_hyper(\"momentum\", var_dtype)\n",
|
||
" momentum_var.assign(momentum_var * momentum_hyper - (1. - momentum_hyper)* grad)\n",
|
||
" var.assign_add(momentum_var * lr_t)\n",
|
||
"\n",
|
||
" def _resource_apply_sparse(self, grad, var):\n",
|
||
" raise NotImplementedError\n",
|
||
"\n",
|
||
" def get_config(self):\n",
|
||
" base_config = super().get_config()\n",
|
||
" return {\n",
|
||
" **base_config,\n",
|
||
" \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n",
|
||
" \"decay\": self._serialize_hyperparameter(\"decay\"),\n",
|
||
" \"momentum\": self._serialize_hyperparameter(\"momentum\"),\n",
|
||
" }"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 264,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 265,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=[8])])\n",
|
||
"model.compile(loss=\"mse\", optimizer=MyMomentumOptimizer())\n",
|
||
"model.fit(X_train_scaled, y_train, epochs=5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Exercises"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 1. to 11.\n",
|
||
"See Appendix A."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 12. Implement a custom layer that performs _Layer Normalization_\n",
|
||
"_We will use this type of layer in Chapter 14 when using Recurrent Neural Networks._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### a.\n",
|
||
"_Exercise: The `build()` method should define two trainable weights *α* and *β*, both of shape `input_shape[-1:]` and data type `tf.float32`. *α* should be initialized with 1s, and *β* with 0s._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Solution: see below."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### b.\n",
|
||
"_Exercise: The `call()` method should compute the mean_ μ _and standard deviation_ σ _of each instance's features. For this, you can use `tf.nn.moments(inputs, axes=-1, keepdims=True)`, which returns the mean μ and the variance σ<sup>2</sup> of all instances (compute the square root of the variance to get the standard deviation). Then the function should compute and return *α*⊗(*X* - μ)/(σ + ε) + *β*, where ⊗ represents itemwise multiplication (`*`) and ε is a smoothing term (small constant to avoid division by zero, e.g., 0.001)._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 266,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class LayerNormalization(tf.keras.layers.Layer):\n",
|
||
" def __init__(self, eps=0.001, **kwargs):\n",
|
||
" super().__init__(**kwargs)\n",
|
||
" self.eps = eps\n",
|
||
"\n",
|
||
" def build(self, batch_input_shape):\n",
|
||
" self.alpha = self.add_weight(\n",
|
||
" name=\"alpha\", shape=batch_input_shape[-1:],\n",
|
||
" initializer=\"ones\")\n",
|
||
" self.beta = self.add_weight(\n",
|
||
" name=\"beta\", shape=batch_input_shape[-1:],\n",
|
||
" initializer=\"zeros\")\n",
|
||
" super().build(batch_input_shape) # must be at the end\n",
|
||
"\n",
|
||
" def call(self, X):\n",
|
||
" mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)\n",
|
||
" return self.alpha * (X - mean) / (tf.sqrt(variance + self.eps)) + self.beta\n",
|
||
"\n",
|
||
" def compute_output_shape(self, batch_input_shape):\n",
|
||
" return batch_input_shape\n",
|
||
"\n",
|
||
" def get_config(self):\n",
|
||
" base_config = super().get_config()\n",
|
||
" return {**base_config, \"eps\": self.eps}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Note that making _ε_ a hyperparameter (`eps`) was not compulsory. Also note that it's preferable to compute `tf.sqrt(variance + self.eps)` rather than `tf.sqrt(variance) + self.eps`. Indeed, the derivative of sqrt(z) is undefined when z=0, so training will bomb whenever the variance vector has at least one component equal to 0. Adding _ε_ within the square root guarantees that this will never happen."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### c.\n",
|
||
"_Exercise: Ensure that your custom layer produces the same (or very nearly the same) output as the `tf.keras.layers.LayerNormalization` layer._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's create one instance of each class, apply them to some data (e.g., the training set), and ensure that the difference is negligeable."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 267,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X = X_train.astype(np.float32)\n",
|
||
"\n",
|
||
"custom_layer_norm = LayerNormalization()\n",
|
||
"keras_layer_norm = tf.keras.layers.LayerNormalization()\n",
|
||
"\n",
|
||
"tf.reduce_mean(tf.keras.losses.mean_absolute_error(\n",
|
||
" keras_layer_norm(X), custom_layer_norm(X)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Yep, that's close enough. To be extra sure, let's make alpha and beta completely random and compare again:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 268,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"random_alpha = np.random.rand(X.shape[-1])\n",
|
||
"random_beta = np.random.rand(X.shape[-1])\n",
|
||
"\n",
|
||
"custom_layer_norm.set_weights([random_alpha, random_beta])\n",
|
||
"keras_layer_norm.set_weights([random_alpha, random_beta])\n",
|
||
"\n",
|
||
"tf.reduce_mean(tf.keras.losses.mean_absolute_error(\n",
|
||
" keras_layer_norm(X), custom_layer_norm(X)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Still a negligeable difference! Our custom layer works fine."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 13. Train a model using a custom training loop to tackle the Fashion MNIST dataset\n",
|
||
"_The Fashion MNIST dataset was introduced in Chapter 9._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### a.\n",
|
||
"_Exercise: Display the epoch, iteration, mean training loss, and mean accuracy over each epoch (updated at each iteration), as well as the validation loss and accuracy at the end of each epoch._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 269,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()\n",
|
||
"X_train_full = X_train_full.astype(np.float32) / 255.\n",
|
||
"X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n",
|
||
"y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n",
|
||
"X_test = X_test.astype(np.float32) / 255."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 270,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 271,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Flatten(input_shape=[28, 28]),\n",
|
||
" tf.keras.layers.Dense(100, activation=\"relu\"),\n",
|
||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 272,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 5\n",
|
||
"batch_size = 32\n",
|
||
"n_steps = len(X_train) // batch_size\n",
|
||
"optimizer = tf.keras.optimizers.Nadam(learning_rate=0.01)\n",
|
||
"loss_fn = tf.keras.losses.sparse_categorical_crossentropy\n",
|
||
"mean_loss = tf.keras.metrics.Mean()\n",
|
||
"metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 273,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
|
||
" for epoch in epochs:\n",
|
||
" with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
|
||
" for step in steps:\n",
|
||
" X_batch, y_batch = random_batch(X_train, y_train)\n",
|
||
" with tf.GradientTape() as tape:\n",
|
||
" y_pred = model(X_batch)\n",
|
||
" main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
|
||
" loss = tf.add_n([main_loss] + model.losses)\n",
|
||
" gradients = tape.gradient(loss, model.trainable_variables)\n",
|
||
" optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
|
||
" for variable in model.variables:\n",
|
||
" if variable.constraint is not None:\n",
|
||
" variable.assign(variable.constraint(variable)) \n",
|
||
" status = OrderedDict()\n",
|
||
" mean_loss(loss)\n",
|
||
" status[\"loss\"] = mean_loss.result().numpy()\n",
|
||
" for metric in metrics:\n",
|
||
" metric(y_batch, y_pred)\n",
|
||
" status[metric.name] = metric.result().numpy()\n",
|
||
" steps.set_postfix(status)\n",
|
||
" y_pred = model(X_valid)\n",
|
||
" status[\"val_loss\"] = np.mean(loss_fn(y_valid, y_pred))\n",
|
||
" status[\"val_accuracy\"] = np.mean(tf.keras.metrics.sparse_categorical_accuracy(\n",
|
||
" tf.constant(y_valid, dtype=np.float32), y_pred))\n",
|
||
" steps.set_postfix(status)\n",
|
||
" for metric in [mean_loss] + metrics:\n",
|
||
" metric.reset_states()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### b.\n",
|
||
"_Exercise: Try using a different optimizer with a different learning rate for the upper layers and the lower layers._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 274,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.keras.backend.clear_session()\n",
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 275,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"lower_layers = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Flatten(input_shape=[28, 28]),\n",
|
||
" tf.keras.layers.Dense(100, activation=\"relu\"),\n",
|
||
"])\n",
|
||
"upper_layers = tf.keras.Sequential([\n",
|
||
" tf.keras.layers.Dense(10, activation=\"softmax\"),\n",
|
||
"])\n",
|
||
"model = tf.keras.Sequential([\n",
|
||
" lower_layers, upper_layers\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 276,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"lower_optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)\n",
|
||
"upper_optimizer = tf.keras.optimizers.Nadam(learning_rate=1e-3)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 277,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 5\n",
|
||
"batch_size = 32\n",
|
||
"n_steps = len(X_train) // batch_size\n",
|
||
"loss_fn = tf.keras.losses.sparse_categorical_crossentropy\n",
|
||
"mean_loss = tf.keras.metrics.Mean()\n",
|
||
"metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 278,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
|
||
" for epoch in epochs:\n",
|
||
" with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
|
||
" for step in steps:\n",
|
||
" X_batch, y_batch = random_batch(X_train, y_train)\n",
|
||
" with tf.GradientTape(persistent=True) as tape:\n",
|
||
" y_pred = model(X_batch)\n",
|
||
" main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
|
||
" loss = tf.add_n([main_loss] + model.losses)\n",
|
||
" for layers, optimizer in ((lower_layers, lower_optimizer),\n",
|
||
" (upper_layers, upper_optimizer)):\n",
|
||
" gradients = tape.gradient(loss, layers.trainable_variables)\n",
|
||
" optimizer.apply_gradients(zip(gradients, layers.trainable_variables))\n",
|
||
" del tape\n",
|
||
" for variable in model.variables:\n",
|
||
" if variable.constraint is not None:\n",
|
||
" variable.assign(variable.constraint(variable)) \n",
|
||
" status = OrderedDict()\n",
|
||
" mean_loss(loss)\n",
|
||
" status[\"loss\"] = mean_loss.result().numpy()\n",
|
||
" for metric in metrics:\n",
|
||
" metric(y_batch, y_pred)\n",
|
||
" status[metric.name] = metric.result().numpy()\n",
|
||
" steps.set_postfix(status)\n",
|
||
" y_pred = model(X_valid)\n",
|
||
" status[\"val_loss\"] = np.mean(loss_fn(y_valid, y_pred))\n",
|
||
" status[\"val_accuracy\"] = np.mean(tf.keras.metrics.sparse_categorical_accuracy(\n",
|
||
" tf.constant(y_valid, dtype=np.float32), y_pred))\n",
|
||
" steps.set_postfix(status)\n",
|
||
" for metric in [mean_loss] + metrics:\n",
|
||
" metric.reset_states()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.12"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|