diff --git a/12_custom_models_and_training_with_tensorflow.ipynb b/12_custom_models_and_training_with_tensorflow.ipynb
index 1f044eb..12738f3 100644
--- a/12_custom_models_and_training_with_tensorflow.ipynb
+++ b/12_custom_models_and_training_with_tensorflow.ipynb
@@ -70,6 +70,7 @@
     "\n",
     "# to make this notebook's output stable across runs\n",
     "np.random.seed(42)\n",
+    "tf.random.set_seed(42)\n",
     "\n",
     "# To plot pretty figures\n",
     "%matplotlib inline\n",
@@ -1079,6 +1080,17 @@
    "execution_count": 84,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def my_softplus(z): # return value is just tf.nn.softplus(z)\n",
     "    return tf.math.log(tf.exp(z) + 1.0)\n",
@@ -1096,7 +1108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 86,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1108,7 +1120,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1124,7 +1147,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 89,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1133,7 +1156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 90,
    "metadata": {
     "scrolled": false
    },
@@ -1145,7 +1168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 89,
+   "execution_count": 91,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1154,7 +1177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 92,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1162,7 +1185,7 @@
     "    \"my_model_with_many_custom_parts.h5\",\n",
     "    custom_objects={\n",
     "       \"my_l1_regularizer\": my_l1_regularizer,\n",
-    "       \"my_positive_weights\": lambda: my_positive_weights,\n",
+    "       \"my_positive_weights\": my_positive_weights,\n",
     "       \"my_glorot_initializer\": my_glorot_initializer,\n",
     "       \"my_softplus\": my_softplus,\n",
     "    })"
@@ -1170,7 +1193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 93,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1185,7 +1208,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1201,7 +1235,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
+   "execution_count": 96,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1210,7 +1244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
+   "execution_count": 97,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1220,7 +1254,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 98,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1229,7 +1263,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 99,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1237,7 +1271,7 @@
     "    \"my_model_with_many_custom_parts.h5\",\n",
     "    custom_objects={\n",
     "       \"MyL1Regularizer\": MyL1Regularizer,\n",
-    "       \"my_positive_weights\": lambda: my_positive_weights,\n",
+    "       \"my_positive_weights\": my_positive_weights,\n",
     "       \"my_glorot_initializer\": my_glorot_initializer,\n",
     "       \"my_softplus\": my_softplus,\n",
     "    })"
@@ -1252,7 +1286,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1265,7 +1310,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1274,7 +1319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": 103,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1294,7 +1339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
+   "execution_count": 104,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1303,7 +1348,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 101,
+   "execution_count": 105,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1313,7 +1358,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
+   "execution_count": 106,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1329,7 +1374,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 103,
+   "execution_count": 107,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1339,7 +1384,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 108,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1348,7 +1393,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 109,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1357,7 +1402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 110,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1366,7 +1411,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 111,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1382,7 +1427,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 112,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1410,9 +1455,16 @@
     "        return {**base_config, \"threshold\": self.threshold}"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Warning**: when running the following cell, if you get autograph warnings such as `WARNING:tensorflow:AutoGraph could not transform [...] and will run it as-is`, then please install version 0.2.2 of the gast library (e.g., by running `!pip install gast==0.2.2`), then restart the kernel and run this notebook again from the beginning (see [autograph issue #1](https://github.com/tensorflow/autograph/issues/1) for more details):"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1426,7 +1478,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1440,7 +1492,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 115,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1449,7 +1501,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 116,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1466,7 +1518,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 117,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1479,7 +1542,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 119,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1488,7 +1551,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 120,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1497,7 +1560,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 121,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1506,7 +1569,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": 122,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1517,7 +1580,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 123,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1526,7 +1589,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": 124,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1542,7 +1605,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1568,7 +1631,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 126,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1581,7 +1655,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 128,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1590,19 +1664,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 129,
    "metadata": {
     "scrolled": true
    },
    "outputs": [],
    "source": [
     "sample_weight = np.random.rand(len(y_train))\n",
-    "history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)"
+    "history = model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32),\n",
+    "                    epochs=2, sample_weight=sample_weight)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 130,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1611,7 +1686,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 131,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1620,7 +1695,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 132,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1630,16 +1705,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 133,
    "metadata": {},
    "outputs": [],
    "source": [
-    "model.fit(X_train_scaled, y_train, epochs=2)"
+    "model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32), epochs=2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 134,
    "metadata": {
     "scrolled": true
    },
@@ -1657,7 +1732,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 135,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1666,7 +1741,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 130,
+   "execution_count": 136,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1682,7 +1757,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 131,
+   "execution_count": 137,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1699,7 +1785,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 132,
+   "execution_count": 139,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1731,7 +1817,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 133,
+   "execution_count": 140,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1743,7 +1840,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 134,
+   "execution_count": 142,
    "metadata": {
     "scrolled": false
    },
@@ -1757,7 +1854,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 135,
+   "execution_count": 143,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1766,7 +1863,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 136,
+   "execution_count": 144,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1776,7 +1873,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 137,
+   "execution_count": 145,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1792,7 +1889,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 138,
+   "execution_count": 146,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1810,7 +1918,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 139,
+   "execution_count": 148,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1832,7 +1940,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 140,
+   "execution_count": 149,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1851,7 +1959,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 141,
+   "execution_count": 150,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1860,7 +1968,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 142,
+   "execution_count": 151,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1887,7 +1995,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 143,
+   "execution_count": 152,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1916,7 +2024,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 144,
+   "execution_count": 153,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 154,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1929,7 +2048,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 145,
+   "execution_count": 155,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1938,7 +2057,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 146,
+   "execution_count": 156,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1947,7 +2066,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 147,
+   "execution_count": 157,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1963,7 +2082,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 148,
+   "execution_count": 158,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 159,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1978,7 +2108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 149,
+   "execution_count": 160,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1997,7 +2127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 150,
+   "execution_count": 161,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2031,7 +2161,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 151,
+   "execution_count": 162,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 163,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2050,7 +2191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 152,
+   "execution_count": 164,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2060,7 +2201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 153,
+   "execution_count": 165,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2071,7 +2212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 154,
+   "execution_count": 166,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2080,7 +2221,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 155,
+   "execution_count": 167,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2093,7 +2234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 168,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2102,7 +2243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 157,
+   "execution_count": 169,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2118,7 +2259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 158,
+   "execution_count": 170,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2132,7 +2273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 159,
+   "execution_count": 171,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2141,7 +2282,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 160,
+   "execution_count": 172,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2154,7 +2295,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 161,
+   "execution_count": 173,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2163,7 +2304,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 162,
+   "execution_count": 174,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2177,7 +2318,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 163,
+   "execution_count": 175,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2186,7 +2327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 164,
+   "execution_count": 176,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2200,7 +2341,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 165,
+   "execution_count": 177,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2215,7 +2356,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 166,
+   "execution_count": 178,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2230,7 +2371,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 167,
+   "execution_count": 179,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2239,7 +2380,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 168,
+   "execution_count": 180,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2248,7 +2389,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 169,
+   "execution_count": 181,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2263,7 +2404,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 170,
+   "execution_count": 182,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2276,7 +2417,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 171,
+   "execution_count": 183,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2285,7 +2426,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 172,
+   "execution_count": 184,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2298,7 +2439,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 173,
+   "execution_count": 185,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2312,7 +2453,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 174,
+   "execution_count": 186,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2322,7 +2463,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 175,
+   "execution_count": 187,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2342,7 +2483,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 176,
+   "execution_count": 188,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 189,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2356,7 +2508,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 177,
+   "execution_count": 190,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2367,7 +2519,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 178,
+   "execution_count": 191,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2381,7 +2533,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 179,
+   "execution_count": 192,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2406,7 +2558,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 180,
+   "execution_count": 193,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2421,7 +2573,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 181,
+   "execution_count": 194,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2430,7 +2582,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 182,
+   "execution_count": 195,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2443,7 +2595,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 183,
+   "execution_count": 196,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2459,7 +2611,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 184,
+   "execution_count": 197,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 198,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2474,7 +2637,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 185,
+   "execution_count": 199,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2502,16 +2665,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 186,
+   "execution_count": 200,
    "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
-    "    from tqdm import tnrange\n",
+    "    from tqdm.notebook import trange\n",
     "    from collections import OrderedDict\n",
-    "    with tnrange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
+    "    with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
     "        for epoch in epochs:\n",
-    "            with tnrange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
+    "            with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
     "                for step in steps:\n",
     "                    X_batch, y_batch = random_batch(X_train_scaled, y_train)\n",
     "                    with tf.GradientTape() as tape:\n",
@@ -2545,7 +2708,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 187,
+   "execution_count": 201,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2555,7 +2718,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 188,
+   "execution_count": 202,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2564,7 +2727,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 189,
+   "execution_count": 203,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2573,7 +2736,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 190,
+   "execution_count": 204,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2583,7 +2746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 191,
+   "execution_count": 205,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2592,7 +2755,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 192,
+   "execution_count": 206,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2608,7 +2771,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 193,
+   "execution_count": 207,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2618,7 +2781,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 194,
+   "execution_count": 208,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2627,7 +2790,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 195,
+   "execution_count": 209,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2643,7 +2806,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 196,
+   "execution_count": 210,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2652,7 +2815,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 197,
+   "execution_count": 211,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2662,7 +2825,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 198,
+   "execution_count": 212,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2672,7 +2835,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 199,
+   "execution_count": 213,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2681,7 +2844,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 200,
+   "execution_count": 214,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2690,7 +2853,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 201,
+   "execution_count": 215,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2699,7 +2862,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 202,
+   "execution_count": 216,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2715,7 +2878,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 203,
+   "execution_count": 217,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2727,7 +2890,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 204,
+   "execution_count": 218,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2736,7 +2899,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 205,
+   "execution_count": 219,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2745,7 +2908,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 206,
+   "execution_count": 220,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2765,7 +2928,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 207,
+   "execution_count": 221,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2777,7 +2940,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 208,
+   "execution_count": 222,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 223,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2789,7 +2963,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 209,
+   "execution_count": 224,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2816,7 +2990,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 210,
+   "execution_count": 225,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2829,7 +3003,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 211,
+   "execution_count": 226,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2838,7 +3012,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 212,
+   "execution_count": 227,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2854,7 +3028,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 213,
+   "execution_count": 228,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2868,7 +3042,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 214,
+   "execution_count": 229,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2877,7 +3051,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 215,
+   "execution_count": 230,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2893,7 +3067,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 216,
+   "execution_count": 231,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2906,7 +3080,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 217,
+   "execution_count": 232,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2922,7 +3096,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 218,
+   "execution_count": 233,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2935,7 +3109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 219,
+   "execution_count": 234,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2945,7 +3119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 220,
+   "execution_count": 235,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2955,7 +3129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 221,
+   "execution_count": 236,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2968,7 +3142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 222,
+   "execution_count": 237,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2978,7 +3152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 223,
+   "execution_count": 238,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2988,7 +3162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 224,
+   "execution_count": 239,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3003,7 +3177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 225,
+   "execution_count": 240,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3014,7 +3188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 226,
+   "execution_count": 241,
    "metadata": {
     "scrolled": true
    },
@@ -3031,7 +3205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 227,
+   "execution_count": 242,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3045,7 +3219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 228,
+   "execution_count": 243,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3069,7 +3243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 229,
+   "execution_count": 244,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3081,7 +3255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 230,
+   "execution_count": 245,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3093,7 +3267,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 231,
+   "execution_count": 246,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3122,7 +3296,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 232,
+   "execution_count": 247,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 248,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3147,7 +3332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 233,
+   "execution_count": 249,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3156,7 +3341,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 234,
+   "execution_count": 250,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3174,7 +3359,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 235,
+   "execution_count": 251,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 252,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3183,7 +3379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 236,
+   "execution_count": 253,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3199,7 +3395,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 237,
+   "execution_count": 254,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3217,7 +3413,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 238,
+   "execution_count": 255,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 256,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3226,7 +3433,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 239,
+   "execution_count": 257,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3235,7 +3442,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 240,
+   "execution_count": 258,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3260,7 +3467,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 241,
+   "execution_count": 259,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3306,7 +3513,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 242,
+   "execution_count": 260,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 261,
    "metadata": {
     "scrolled": false
    },
@@ -3316,6 +3534,362 @@
     "model.compile(loss=\"mse\", optimizer=MyMomentumOptimizer())\n",
     "model.fit(X_train_scaled, y_train, epochs=5)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercises"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. to 11.\n",
+    "See Appendix A."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 12. Implement a custom layer that performs _Layer Normalization_\n",
+    "_We will use this type of layer in Chapter 15 when using Recurrent Neural Networks._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### a.\n",
+    "_Exercise: The `build()` method should define two trainable weights *α* and *β*, both of shape `input_shape[-1:]` and data type `tf.float32`. *α* should be initialized with 1s, and *β* with 0s._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Solution: see below."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### b.\n",
+    "_Exercise: The `call()` method should compute the mean_ μ _and standard deviation_ σ _of each instance's features. For this, you can use `tf.nn.moments(inputs, axes=-1, keepdims=True)`, which returns the mean μ and the variance σ<sup>2</sup> of all instances (compute the square root of the variance to get the standard deviation). Then the function should compute and return *α*⊗(*X* - μ)/(σ + ε) + *β*, where ⊗ represents itemwise multiplication (`*`) and ε is a smoothing term (small constant to avoid division by zero, e.g., 0.001)._"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 262,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LayerNormalization(keras.layers.Layer):\n",
+    "    def __init__(self, eps=0.001, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.eps = eps\n",
+    "\n",
+    "    def build(self, batch_input_shape):\n",
+    "        self.alpha = self.add_weight(\n",
+    "            name=\"alpha\", shape=batch_input_shape[-1:],\n",
+    "            initializer=\"ones\")\n",
+    "        self.beta = self.add_weight(\n",
+    "            name=\"beta\", shape=batch_input_shape[-1:],\n",
+    "            initializer=\"zeros\")\n",
+    "        super().build(batch_input_shape) # must be at the end\n",
+    "\n",
+    "    def call(self, X):\n",
+    "        mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)\n",
+    "        return self.alpha * (X - mean) / (tf.sqrt(variance + self.eps)) + self.beta\n",
+    "\n",
+    "    def compute_output_shape(self, batch_input_shape):\n",
+    "        return batch_input_shape\n",
+    "\n",
+    "    def get_config(self):\n",
+    "        base_config = super().get_config()\n",
+    "        return {**base_config, \"eps\": self.eps}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that making _ε_ a hyperparameter (`eps`) was not compulsory. Also note that it's preferable to compute `tf.sqrt(variance + self.eps)` rather than `tf.sqrt(variance) + self.eps`. Indeed, the derivative of sqrt(z) is undefined when z=0, so training will bomb whenever the variance vector has at least one component equal to 0. Adding _ε_ within the square root guarantees that this will never happen."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### c.\n",
+    "_Exercise: Ensure that your custom layer produces the same (or very nearly the same) output as the `keras.layers.LayerNormalization` layer._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create one instance of each class, apply them to some data (e.g., the training set), and ensure that the difference is negligeable."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 263,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = X_train.astype(np.float32)\n",
+    "\n",
+    "custom_layer_norm = LayerNormalization()\n",
+    "keras_layer_norm = keras.layers.LayerNormalization()\n",
+    "\n",
+    "tf.reduce_mean(keras.losses.mean_absolute_error(\n",
+    "    keras_layer_norm(X), custom_layer_norm(X)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Yep, that's close enough. To be extra sure, let's make alpha and beta completely random and compare again:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 264,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random_alpha = np.random.rand(X.shape[-1])\n",
+    "random_beta = np.random.rand(X.shape[-1])\n",
+    "\n",
+    "custom_layer_norm.set_weights([random_alpha, random_beta])\n",
+    "keras_layer_norm.set_weights([random_alpha, random_beta])\n",
+    "\n",
+    "tf.reduce_mean(keras.losses.mean_absolute_error(\n",
+    "    keras_layer_norm(X), custom_layer_norm(X)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Still a negligeable difference! Our custom layer works fine."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 13. Train a model using a custom training loop to tackle the Fashion MNIST dataset\n",
+    "_The Fashion MNIST dataset was introduced in Chapter 10._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### a.\n",
+    "_Exercise: Display the epoch, iteration, mean training loss, and mean accuracy over each epoch (updated at each iteration), as well as the validation loss and accuracy at the end of each epoch._"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 265,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
+    "X_train_full = X_train_full.astype(np.float32) / 255.\n",
+    "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n",
+    "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n",
+    "X_test = X_test.astype(np.float32) / 255."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 266,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 267,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = keras.models.Sequential([\n",
+    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
+    "    keras.layers.Dense(100, activation=\"relu\"),\n",
+    "    keras.layers.Dense(10, activation=\"softmax\"),\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 268,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_epochs = 5\n",
+    "batch_size = 32\n",
+    "n_steps = len(X_train) // batch_size\n",
+    "optimizer = keras.optimizers.Nadam(lr=0.01)\n",
+    "loss_fn = keras.losses.sparse_categorical_crossentropy\n",
+    "mean_loss = keras.metrics.Mean()\n",
+    "metrics = [keras.metrics.SparseCategoricalAccuracy()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 269,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
+    "    for epoch in epochs:\n",
+    "        with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
+    "            for step in steps:\n",
+    "                X_batch, y_batch = random_batch(X_train, y_train)\n",
+    "                with tf.GradientTape() as tape:\n",
+    "                    y_pred = model(X_batch)\n",
+    "                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
+    "                    loss = tf.add_n([main_loss] + model.losses)\n",
+    "                gradients = tape.gradient(loss, model.trainable_variables)\n",
+    "                optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
+    "                for variable in model.variables:\n",
+    "                    if variable.constraint is not None:\n",
+    "                        variable.assign(variable.constraint(variable))                    \n",
+    "                status = OrderedDict()\n",
+    "                mean_loss(loss)\n",
+    "                status[\"loss\"] = mean_loss.result().numpy()\n",
+    "                for metric in metrics:\n",
+    "                    metric(y_batch, y_pred)\n",
+    "                    status[metric.name] = metric.result().numpy()\n",
+    "                steps.set_postfix(status)\n",
+    "            y_pred = model(X_valid)\n",
+    "            status[\"val_loss\"] = np.mean(loss_fn(y_valid, y_pred))\n",
+    "            status[\"val_accuracy\"] = np.mean(keras.metrics.sparse_categorical_accuracy(\n",
+    "                tf.constant(y_valid, dtype=np.float32), y_pred))\n",
+    "            steps.set_postfix(status)\n",
+    "        for metric in [mean_loss] + metrics:\n",
+    "            metric.reset_states()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### b.\n",
+    "_Exercise: Try using a different optimizer with a different learning rate for the upper layers and the lower layers._"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 270,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "keras.backend.clear_session()\n",
+    "np.random.seed(42)\n",
+    "tf.random.set_seed(42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 271,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lower_layers = keras.models.Sequential([\n",
+    "    keras.layers.Flatten(input_shape=[28, 28]),\n",
+    "    keras.layers.Dense(100, activation=\"relu\"),\n",
+    "])\n",
+    "upper_layers = keras.models.Sequential([\n",
+    "    keras.layers.Dense(10, activation=\"softmax\"),\n",
+    "])\n",
+    "model = keras.models.Sequential([\n",
+    "    lower_layers, upper_layers\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 272,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lower_optimizer = keras.optimizers.SGD(lr=1e-4)\n",
+    "upper_optimizer = keras.optimizers.Nadam(lr=1e-3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 273,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_epochs = 5\n",
+    "batch_size = 32\n",
+    "n_steps = len(X_train) // batch_size\n",
+    "loss_fn = keras.losses.sparse_categorical_crossentropy\n",
+    "mean_loss = keras.metrics.Mean()\n",
+    "metrics = [keras.metrics.SparseCategoricalAccuracy()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 274,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n",
+    "    for epoch in epochs:\n",
+    "        with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n",
+    "            for step in steps:\n",
+    "                X_batch, y_batch = random_batch(X_train, y_train)\n",
+    "                with tf.GradientTape(persistent=True) as tape:\n",
+    "                    y_pred = model(X_batch)\n",
+    "                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n",
+    "                    loss = tf.add_n([main_loss] + model.losses)\n",
+    "                for layers, optimizer in ((lower_layers, lower_optimizer),\n",
+    "                                          (upper_layers, upper_optimizer)):\n",
+    "                    gradients = tape.gradient(loss, layers.trainable_variables)\n",
+    "                    optimizer.apply_gradients(zip(gradients, layers.trainable_variables))\n",
+    "                del tape\n",
+    "                for variable in model.variables:\n",
+    "                    if variable.constraint is not None:\n",
+    "                        variable.assign(variable.constraint(variable))                    \n",
+    "                status = OrderedDict()\n",
+    "                mean_loss(loss)\n",
+    "                status[\"loss\"] = mean_loss.result().numpy()\n",
+    "                for metric in metrics:\n",
+    "                    metric(y_batch, y_pred)\n",
+    "                    status[metric.name] = metric.result().numpy()\n",
+    "                steps.set_postfix(status)\n",
+    "            y_pred = model(X_valid)\n",
+    "            status[\"val_loss\"] = np.mean(loss_fn(y_valid, y_pred))\n",
+    "            status[\"val_accuracy\"] = np.mean(keras.metrics.sparse_categorical_accuracy(\n",
+    "                tf.constant(y_valid, dtype=np.float32), y_pred))\n",
+    "            steps.set_postfix(status)\n",
+    "        for metric in [mean_loss] + metrics:\n",
+    "            metric.reset_states()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -3334,7 +3908,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.6"
   }
  },
  "nbformat": 4,