diff --git a/12_custom_models_and_training_with_tensorflow.ipynb b/12_custom_models_and_training_with_tensorflow.ipynb index 1f044eb..12738f3 100644 --- a/12_custom_models_and_training_with_tensorflow.ipynb +++ b/12_custom_models_and_training_with_tensorflow.ipynb @@ -70,6 +70,7 @@ "\n", "# to make this notebook's output stable across runs\n", "np.random.seed(42)\n", + "tf.random.set_seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", @@ -1079,6 +1080,17 @@ "execution_count": 84, "metadata": {}, "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], "source": [ "def my_softplus(z): # return value is just tf.nn.softplus(z)\n", " return tf.math.log(tf.exp(z) + 1.0)\n", @@ -1096,7 +1108,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ @@ -1108,7 +1120,18 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, "metadata": {}, "outputs": [], "source": [ @@ -1124,7 +1147,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -1133,7 +1156,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 90, "metadata": { "scrolled": false }, @@ -1145,7 +1168,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -1154,7 +1177,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ @@ -1162,7 +1185,7 @@ " \"my_model_with_many_custom_parts.h5\",\n", " custom_objects={\n", " \"my_l1_regularizer\": my_l1_regularizer,\n", - " \"my_positive_weights\": lambda: my_positive_weights,\n", + " \"my_positive_weights\": my_positive_weights,\n", " \"my_glorot_initializer\": my_glorot_initializer,\n", " \"my_softplus\": my_softplus,\n", " })" @@ -1170,7 +1193,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -1185,7 +1208,18 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ @@ -1201,7 +1235,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -1210,7 +1244,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -1220,7 +1254,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 98, "metadata": {}, "outputs": [], "source": [ @@ -1229,7 +1263,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 99, "metadata": {}, "outputs": [], "source": [ @@ -1237,7 +1271,7 @@ " \"my_model_with_many_custom_parts.h5\",\n", " custom_objects={\n", " \"MyL1Regularizer\": MyL1Regularizer,\n", - " \"my_positive_weights\": lambda: my_positive_weights,\n", + " \"my_positive_weights\": my_positive_weights,\n", " \"my_glorot_initializer\": my_glorot_initializer,\n", " \"my_softplus\": my_softplus,\n", " })" @@ -1252,7 +1286,18 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -1265,7 +1310,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 102, "metadata": {}, "outputs": [], "source": [ @@ -1274,7 +1319,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 103, "metadata": {}, "outputs": [], "source": [ @@ -1294,7 +1339,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 104, "metadata": {}, "outputs": [], "source": [ @@ -1303,7 +1348,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 105, "metadata": {}, "outputs": [], "source": [ @@ -1313,7 +1358,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 106, "metadata": {}, "outputs": [], "source": [ @@ -1329,7 +1374,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ @@ -1339,7 +1384,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ @@ -1348,7 +1393,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ @@ -1357,7 +1402,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 110, "metadata": {}, "outputs": [], "source": [ @@ -1366,7 +1411,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 111, "metadata": {}, "outputs": [], "source": [ @@ -1382,7 +1427,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -1410,9 +1455,16 @@ " return {**base_config, \"threshold\": self.threshold}" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Warning**: when running the following cell, if you get autograph warnings such as `WARNING:tensorflow:AutoGraph could not transform [...] and will run it as-is`, then please install version 0.2.2 of the gast library (e.g., by running `!pip install gast==0.2.2`), then restart the kernel and run this notebook again from the beginning (see [autograph issue #1](https://github.com/tensorflow/autograph/issues/1) for more details):" + ] + }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -1426,7 +1478,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ @@ -1440,7 +1492,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -1449,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -1466,7 +1518,18 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -1479,7 +1542,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -1488,7 +1551,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -1497,7 +1560,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ @@ -1506,7 +1569,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ @@ -1517,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -1526,7 +1589,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 124, "metadata": {}, "outputs": [], "source": [ @@ -1542,7 +1605,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ @@ -1568,7 +1631,18 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -1581,7 +1655,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -1590,19 +1664,20 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 129, "metadata": { "scrolled": true }, "outputs": [], "source": [ "sample_weight = np.random.rand(len(y_train))\n", - "history = model.fit(X_train_scaled, y_train, epochs=2, sample_weight=sample_weight)" + "history = model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32),\n", + " epochs=2, sample_weight=sample_weight)" ] }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -1611,7 +1686,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 131, "metadata": {}, "outputs": [], "source": [ @@ -1620,7 +1695,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 132, "metadata": {}, "outputs": [], "source": [ @@ -1630,16 +1705,16 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 133, "metadata": {}, "outputs": [], "source": [ - "model.fit(X_train_scaled, y_train, epochs=2)" + "model.fit(X_train_scaled.astype(np.float32), y_train.astype(np.float32), epochs=2)" ] }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 134, "metadata": { "scrolled": true }, @@ -1657,7 +1732,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ @@ -1666,7 +1741,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 136, "metadata": {}, "outputs": [], "source": [ @@ -1682,7 +1757,18 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, "metadata": {}, "outputs": [], "source": [ @@ -1699,7 +1785,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 139, "metadata": {}, "outputs": [], "source": [ @@ -1731,7 +1817,18 @@ }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 141, "metadata": {}, "outputs": [], "source": [ @@ -1743,7 +1840,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 142, "metadata": { "scrolled": false }, @@ -1757,7 +1854,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ @@ -1766,7 +1863,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 144, "metadata": {}, "outputs": [], "source": [ @@ -1776,7 +1873,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 145, "metadata": {}, "outputs": [], "source": [ @@ -1792,7 +1889,18 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 146, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ @@ -1810,7 +1918,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 148, "metadata": {}, "outputs": [], "source": [ @@ -1832,7 +1940,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 149, "metadata": {}, "outputs": [], "source": [ @@ -1851,7 +1959,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 150, "metadata": {}, "outputs": [], "source": [ @@ -1860,7 +1968,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ @@ -1887,7 +1995,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 152, "metadata": {}, "outputs": [], "source": [ @@ -1916,7 +2024,18 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 153, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 154, "metadata": {}, "outputs": [], "source": [ @@ -1929,7 +2048,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 155, "metadata": {}, "outputs": [], "source": [ @@ -1938,7 +2057,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ @@ -1947,7 +2066,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ @@ -1963,7 +2082,18 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 158, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 159, "metadata": {}, "outputs": [], "source": [ @@ -1978,7 +2108,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 160, "metadata": {}, "outputs": [], "source": [ @@ -1997,7 +2127,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 161, "metadata": {}, "outputs": [], "source": [ @@ -2031,7 +2161,18 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, "metadata": {}, "outputs": [], "source": [ @@ -2050,7 +2191,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 164, "metadata": {}, "outputs": [], "source": [ @@ -2060,7 +2201,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 165, "metadata": {}, "outputs": [], "source": [ @@ -2071,7 +2212,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 166, "metadata": {}, "outputs": [], "source": [ @@ -2080,7 +2221,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 167, "metadata": {}, "outputs": [], "source": [ @@ -2093,7 +2234,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 168, "metadata": {}, "outputs": [], "source": [ @@ -2102,7 +2243,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 169, "metadata": {}, "outputs": [], "source": [ @@ -2118,7 +2259,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": 170, "metadata": {}, "outputs": [], "source": [ @@ -2132,7 +2273,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": 171, "metadata": {}, "outputs": [], "source": [ @@ -2141,7 +2282,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": 172, "metadata": {}, "outputs": [], "source": [ @@ -2154,7 +2295,7 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": 173, "metadata": {}, "outputs": [], "source": [ @@ -2163,7 +2304,7 @@ }, { "cell_type": "code", - "execution_count": 162, + "execution_count": 174, "metadata": {}, "outputs": [], "source": [ @@ -2177,7 +2318,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 175, "metadata": {}, "outputs": [], "source": [ @@ -2186,7 +2327,7 @@ }, { "cell_type": "code", - "execution_count": 164, + "execution_count": 176, "metadata": {}, "outputs": [], "source": [ @@ -2200,7 +2341,7 @@ }, { "cell_type": "code", - "execution_count": 165, + "execution_count": 177, "metadata": {}, "outputs": [], "source": [ @@ -2215,7 +2356,7 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 178, "metadata": {}, "outputs": [], "source": [ @@ -2230,7 +2371,7 @@ }, { "cell_type": "code", - "execution_count": 167, + "execution_count": 179, "metadata": {}, "outputs": [], "source": [ @@ -2239,7 +2380,7 @@ }, { "cell_type": "code", - "execution_count": 168, + "execution_count": 180, "metadata": {}, "outputs": [], "source": [ @@ -2248,7 +2389,7 @@ }, { "cell_type": "code", - "execution_count": 169, + "execution_count": 181, "metadata": {}, "outputs": [], "source": [ @@ -2263,7 +2404,7 @@ }, { "cell_type": "code", - "execution_count": 170, + "execution_count": 182, "metadata": {}, "outputs": [], "source": [ @@ -2276,7 +2417,7 @@ }, { "cell_type": "code", - "execution_count": 171, + "execution_count": 183, "metadata": {}, "outputs": [], "source": [ @@ -2285,7 +2426,7 @@ }, { "cell_type": "code", - "execution_count": 172, + "execution_count": 184, "metadata": {}, "outputs": [], "source": [ @@ -2298,7 +2439,7 @@ }, { "cell_type": "code", - "execution_count": 173, + "execution_count": 185, "metadata": {}, "outputs": [], "source": [ @@ -2312,7 +2453,7 @@ }, { "cell_type": "code", - "execution_count": 174, + "execution_count": 186, "metadata": {}, "outputs": [], "source": [ @@ -2322,7 +2463,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 187, "metadata": {}, "outputs": [], "source": [ @@ -2342,7 +2483,18 @@ }, { "cell_type": "code", - "execution_count": 176, + "execution_count": 188, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 189, "metadata": {}, "outputs": [], "source": [ @@ -2356,7 +2508,7 @@ }, { "cell_type": "code", - "execution_count": 177, + "execution_count": 190, "metadata": {}, "outputs": [], "source": [ @@ -2367,7 +2519,7 @@ }, { "cell_type": "code", - "execution_count": 178, + "execution_count": 191, "metadata": {}, "outputs": [], "source": [ @@ -2381,7 +2533,7 @@ }, { "cell_type": "code", - "execution_count": 179, + "execution_count": 192, "metadata": {}, "outputs": [], "source": [ @@ -2406,7 +2558,7 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": 193, "metadata": {}, "outputs": [], "source": [ @@ -2421,7 +2573,7 @@ }, { "cell_type": "code", - "execution_count": 181, + "execution_count": 194, "metadata": {}, "outputs": [], "source": [ @@ -2430,7 +2582,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 195, "metadata": {}, "outputs": [], "source": [ @@ -2443,7 +2595,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 196, "metadata": {}, "outputs": [], "source": [ @@ -2459,7 +2611,18 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 197, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 198, "metadata": {}, "outputs": [], "source": [ @@ -2474,7 +2637,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 199, "metadata": {}, "outputs": [], "source": [ @@ -2502,16 +2665,16 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 200, "metadata": {}, "outputs": [], "source": [ "try:\n", - " from tqdm import tnrange\n", + " from tqdm.notebook import trange\n", " from collections import OrderedDict\n", - " with tnrange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n", + " with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n", " for epoch in epochs:\n", - " with tnrange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n", + " with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n", " for step in steps:\n", " X_batch, y_batch = random_batch(X_train_scaled, y_train)\n", " with tf.GradientTape() as tape:\n", @@ -2545,7 +2708,7 @@ }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 201, "metadata": {}, "outputs": [], "source": [ @@ -2555,7 +2718,7 @@ }, { "cell_type": "code", - "execution_count": 188, + "execution_count": 202, "metadata": {}, "outputs": [], "source": [ @@ -2564,7 +2727,7 @@ }, { "cell_type": "code", - "execution_count": 189, + "execution_count": 203, "metadata": {}, "outputs": [], "source": [ @@ -2573,7 +2736,7 @@ }, { "cell_type": "code", - "execution_count": 190, + "execution_count": 204, "metadata": {}, "outputs": [], "source": [ @@ -2583,7 +2746,7 @@ }, { "cell_type": "code", - "execution_count": 191, + "execution_count": 205, "metadata": {}, "outputs": [], "source": [ @@ -2592,7 +2755,7 @@ }, { "cell_type": "code", - "execution_count": 192, + "execution_count": 206, "metadata": {}, "outputs": [], "source": [ @@ -2608,7 +2771,7 @@ }, { "cell_type": "code", - "execution_count": 193, + "execution_count": 207, "metadata": {}, "outputs": [], "source": [ @@ -2618,7 +2781,7 @@ }, { "cell_type": "code", - "execution_count": 194, + "execution_count": 208, "metadata": {}, "outputs": [], "source": [ @@ -2627,7 +2790,7 @@ }, { "cell_type": "code", - "execution_count": 195, + "execution_count": 209, "metadata": {}, "outputs": [], "source": [ @@ -2643,7 +2806,7 @@ }, { "cell_type": "code", - "execution_count": 196, + "execution_count": 210, "metadata": {}, "outputs": [], "source": [ @@ -2652,7 +2815,7 @@ }, { "cell_type": "code", - "execution_count": 197, + "execution_count": 211, "metadata": {}, "outputs": [], "source": [ @@ -2662,7 +2825,7 @@ }, { "cell_type": "code", - "execution_count": 198, + "execution_count": 212, "metadata": {}, "outputs": [], "source": [ @@ -2672,7 +2835,7 @@ }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 213, "metadata": {}, "outputs": [], "source": [ @@ -2681,7 +2844,7 @@ }, { "cell_type": "code", - "execution_count": 200, + "execution_count": 214, "metadata": {}, "outputs": [], "source": [ @@ -2690,7 +2853,7 @@ }, { "cell_type": "code", - "execution_count": 201, + "execution_count": 215, "metadata": {}, "outputs": [], "source": [ @@ -2699,7 +2862,7 @@ }, { "cell_type": "code", - "execution_count": 202, + "execution_count": 216, "metadata": {}, "outputs": [], "source": [ @@ -2715,7 +2878,7 @@ }, { "cell_type": "code", - "execution_count": 203, + "execution_count": 217, "metadata": {}, "outputs": [], "source": [ @@ -2727,7 +2890,7 @@ }, { "cell_type": "code", - "execution_count": 204, + "execution_count": 218, "metadata": {}, "outputs": [], "source": [ @@ -2736,7 +2899,7 @@ }, { "cell_type": "code", - "execution_count": 205, + "execution_count": 219, "metadata": {}, "outputs": [], "source": [ @@ -2745,7 +2908,7 @@ }, { "cell_type": "code", - "execution_count": 206, + "execution_count": 220, "metadata": {}, "outputs": [], "source": [ @@ -2765,7 +2928,7 @@ }, { "cell_type": "code", - "execution_count": 207, + "execution_count": 221, "metadata": {}, "outputs": [], "source": [ @@ -2777,7 +2940,18 @@ }, { "cell_type": "code", - "execution_count": 208, + "execution_count": 222, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 223, "metadata": {}, "outputs": [], "source": [ @@ -2789,7 +2963,7 @@ }, { "cell_type": "code", - "execution_count": 209, + "execution_count": 224, "metadata": {}, "outputs": [], "source": [ @@ -2816,7 +2990,7 @@ }, { "cell_type": "code", - "execution_count": 210, + "execution_count": 225, "metadata": {}, "outputs": [], "source": [ @@ -2829,7 +3003,7 @@ }, { "cell_type": "code", - "execution_count": 211, + "execution_count": 226, "metadata": {}, "outputs": [], "source": [ @@ -2838,7 +3012,7 @@ }, { "cell_type": "code", - "execution_count": 212, + "execution_count": 227, "metadata": {}, "outputs": [], "source": [ @@ -2854,7 +3028,7 @@ }, { "cell_type": "code", - "execution_count": 213, + "execution_count": 228, "metadata": {}, "outputs": [], "source": [ @@ -2868,7 +3042,7 @@ }, { "cell_type": "code", - "execution_count": 214, + "execution_count": 229, "metadata": {}, "outputs": [], "source": [ @@ -2877,7 +3051,7 @@ }, { "cell_type": "code", - "execution_count": 215, + "execution_count": 230, "metadata": {}, "outputs": [], "source": [ @@ -2893,7 +3067,7 @@ }, { "cell_type": "code", - "execution_count": 216, + "execution_count": 231, "metadata": {}, "outputs": [], "source": [ @@ -2906,7 +3080,7 @@ }, { "cell_type": "code", - "execution_count": 217, + "execution_count": 232, "metadata": {}, "outputs": [], "source": [ @@ -2922,7 +3096,7 @@ }, { "cell_type": "code", - "execution_count": 218, + "execution_count": 233, "metadata": {}, "outputs": [], "source": [ @@ -2935,7 +3109,7 @@ }, { "cell_type": "code", - "execution_count": 219, + "execution_count": 234, "metadata": {}, "outputs": [], "source": [ @@ -2945,7 +3119,7 @@ }, { "cell_type": "code", - "execution_count": 220, + "execution_count": 235, "metadata": {}, "outputs": [], "source": [ @@ -2955,7 +3129,7 @@ }, { "cell_type": "code", - "execution_count": 221, + "execution_count": 236, "metadata": {}, "outputs": [], "source": [ @@ -2968,7 +3142,7 @@ }, { "cell_type": "code", - "execution_count": 222, + "execution_count": 237, "metadata": {}, "outputs": [], "source": [ @@ -2978,7 +3152,7 @@ }, { "cell_type": "code", - "execution_count": 223, + "execution_count": 238, "metadata": {}, "outputs": [], "source": [ @@ -2988,7 +3162,7 @@ }, { "cell_type": "code", - "execution_count": 224, + "execution_count": 239, "metadata": {}, "outputs": [], "source": [ @@ -3003,7 +3177,7 @@ }, { "cell_type": "code", - "execution_count": 225, + "execution_count": 240, "metadata": {}, "outputs": [], "source": [ @@ -3014,7 +3188,7 @@ }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 241, "metadata": { "scrolled": true }, @@ -3031,7 +3205,7 @@ }, { "cell_type": "code", - "execution_count": 227, + "execution_count": 242, "metadata": {}, "outputs": [], "source": [ @@ -3045,7 +3219,7 @@ }, { "cell_type": "code", - "execution_count": 228, + "execution_count": 243, "metadata": {}, "outputs": [], "source": [ @@ -3069,7 +3243,7 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": 244, "metadata": {}, "outputs": [], "source": [ @@ -3081,7 +3255,7 @@ }, { "cell_type": "code", - "execution_count": 230, + "execution_count": 245, "metadata": {}, "outputs": [], "source": [ @@ -3093,7 +3267,7 @@ }, { "cell_type": "code", - "execution_count": 231, + "execution_count": 246, "metadata": {}, "outputs": [], "source": [ @@ -3122,7 +3296,18 @@ }, { "cell_type": "code", - "execution_count": 232, + "execution_count": 247, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 248, "metadata": {}, "outputs": [], "source": [ @@ -3147,7 +3332,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 249, "metadata": {}, "outputs": [], "source": [ @@ -3156,7 +3341,7 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": 250, "metadata": {}, "outputs": [], "source": [ @@ -3174,7 +3359,18 @@ }, { "cell_type": "code", - "execution_count": 235, + "execution_count": 251, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 252, "metadata": {}, "outputs": [], "source": [ @@ -3183,7 +3379,7 @@ }, { "cell_type": "code", - "execution_count": 236, + "execution_count": 253, "metadata": {}, "outputs": [], "source": [ @@ -3199,7 +3395,7 @@ }, { "cell_type": "code", - "execution_count": 237, + "execution_count": 254, "metadata": {}, "outputs": [], "source": [ @@ -3217,7 +3413,18 @@ }, { "cell_type": "code", - "execution_count": 238, + "execution_count": 255, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 256, "metadata": {}, "outputs": [], "source": [ @@ -3226,7 +3433,7 @@ }, { "cell_type": "code", - "execution_count": 239, + "execution_count": 257, "metadata": {}, "outputs": [], "source": [ @@ -3235,7 +3442,7 @@ }, { "cell_type": "code", - "execution_count": 240, + "execution_count": 258, "metadata": {}, "outputs": [], "source": [ @@ -3260,7 +3467,7 @@ }, { "cell_type": "code", - "execution_count": 241, + "execution_count": 259, "metadata": {}, "outputs": [], "source": [ @@ -3306,7 +3513,18 @@ }, { "cell_type": "code", - "execution_count": 242, + "execution_count": 260, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 261, "metadata": { "scrolled": false }, @@ -3316,6 +3534,362 @@ "model.compile(loss=\"mse\", optimizer=MyMomentumOptimizer())\n", "model.fit(X_train_scaled, y_train, epochs=5)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercises" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. to 11.\n", + "See Appendix A." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 12. Implement a custom layer that performs _Layer Normalization_\n", + "_We will use this type of layer in Chapter 15 when using Recurrent Neural Networks._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a.\n", + "_Exercise: The `build()` method should define two trainable weights *α* and *β*, both of shape `input_shape[-1:]` and data type `tf.float32`. *α* should be initialized with 1s, and *β* with 0s._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Solution: see below." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b.\n", + "_Exercise: The `call()` method should compute the mean_ μ _and standard deviation_ σ _of each instance's features. For this, you can use `tf.nn.moments(inputs, axes=-1, keepdims=True)`, which returns the mean μ and the variance σ2 of all instances (compute the square root of the variance to get the standard deviation). Then the function should compute and return *α*⊗(*X* - μ)/(σ + ε) + *β*, where ⊗ represents itemwise multiplication (`*`) and ε is a smoothing term (small constant to avoid division by zero, e.g., 0.001)._" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "metadata": {}, + "outputs": [], + "source": [ + "class LayerNormalization(keras.layers.Layer):\n", + " def __init__(self, eps=0.001, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.eps = eps\n", + "\n", + " def build(self, batch_input_shape):\n", + " self.alpha = self.add_weight(\n", + " name=\"alpha\", shape=batch_input_shape[-1:],\n", + " initializer=\"ones\")\n", + " self.beta = self.add_weight(\n", + " name=\"beta\", shape=batch_input_shape[-1:],\n", + " initializer=\"zeros\")\n", + " super().build(batch_input_shape) # must be at the end\n", + "\n", + " def call(self, X):\n", + " mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)\n", + " return self.alpha * (X - mean) / (tf.sqrt(variance + self.eps)) + self.beta\n", + "\n", + " def compute_output_shape(self, batch_input_shape):\n", + " return batch_input_shape\n", + "\n", + " def get_config(self):\n", + " base_config = super().get_config()\n", + " return {**base_config, \"eps\": self.eps}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that making _ε_ a hyperparameter (`eps`) was not compulsory. Also note that it's preferable to compute `tf.sqrt(variance + self.eps)` rather than `tf.sqrt(variance) + self.eps`. Indeed, the derivative of sqrt(z) is undefined when z=0, so training will bomb whenever the variance vector has at least one component equal to 0. Adding _ε_ within the square root guarantees that this will never happen." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### c.\n", + "_Exercise: Ensure that your custom layer produces the same (or very nearly the same) output as the `keras.layers.LayerNormalization` layer._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create one instance of each class, apply them to some data (e.g., the training set), and ensure that the difference is negligeable." + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [], + "source": [ + "X = X_train.astype(np.float32)\n", + "\n", + "custom_layer_norm = LayerNormalization()\n", + "keras_layer_norm = keras.layers.LayerNormalization()\n", + "\n", + "tf.reduce_mean(keras.losses.mean_absolute_error(\n", + " keras_layer_norm(X), custom_layer_norm(X)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Yep, that's close enough. To be extra sure, let's make alpha and beta completely random and compare again:" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": {}, + "outputs": [], + "source": [ + "random_alpha = np.random.rand(X.shape[-1])\n", + "random_beta = np.random.rand(X.shape[-1])\n", + "\n", + "custom_layer_norm.set_weights([random_alpha, random_beta])\n", + "keras_layer_norm.set_weights([random_alpha, random_beta])\n", + "\n", + "tf.reduce_mean(keras.losses.mean_absolute_error(\n", + " keras_layer_norm(X), custom_layer_norm(X)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Still a negligeable difference! Our custom layer works fine." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 13. Train a model using a custom training loop to tackle the Fashion MNIST dataset\n", + "_The Fashion MNIST dataset was introduced in Chapter 10._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a.\n", + "_Exercise: Display the epoch, iteration, mean training loss, and mean accuracy over each epoch (updated at each iteration), as well as the validation loss and accuracy at the end of each epoch._" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "metadata": {}, + "outputs": [], + "source": [ + "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", + "X_train_full = X_train_full.astype(np.float32) / 255.\n", + "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n", + "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n", + "X_test = X_test.astype(np.float32) / 255." + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"relu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\"),\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [], + "source": [ + "n_epochs = 5\n", + "batch_size = 32\n", + "n_steps = len(X_train) // batch_size\n", + "optimizer = keras.optimizers.Nadam(lr=0.01)\n", + "loss_fn = keras.losses.sparse_categorical_crossentropy\n", + "mean_loss = keras.metrics.Mean()\n", + "metrics = [keras.metrics.SparseCategoricalAccuracy()]" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [], + "source": [ + "with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n", + " for epoch in epochs:\n", + " with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n", + " for step in steps:\n", + " X_batch, y_batch = random_batch(X_train, y_train)\n", + " with tf.GradientTape() as tape:\n", + " y_pred = model(X_batch)\n", + " main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n", + " loss = tf.add_n([main_loss] + model.losses)\n", + " gradients = tape.gradient(loss, model.trainable_variables)\n", + " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", + " for variable in model.variables:\n", + " if variable.constraint is not None:\n", + " variable.assign(variable.constraint(variable)) \n", + " status = OrderedDict()\n", + " mean_loss(loss)\n", + " status[\"loss\"] = mean_loss.result().numpy()\n", + " for metric in metrics:\n", + " metric(y_batch, y_pred)\n", + " status[metric.name] = metric.result().numpy()\n", + " steps.set_postfix(status)\n", + " y_pred = model(X_valid)\n", + " status[\"val_loss\"] = np.mean(loss_fn(y_valid, y_pred))\n", + " status[\"val_accuracy\"] = np.mean(keras.metrics.sparse_categorical_accuracy(\n", + " tf.constant(y_valid, dtype=np.float32), y_pred))\n", + " steps.set_postfix(status)\n", + " for metric in [mean_loss] + metrics:\n", + " metric.reset_states()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b.\n", + "_Exercise: Try using a different optimizer with a different learning rate for the upper layers and the lower layers._" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "metadata": {}, + "outputs": [], + "source": [ + "keras.backend.clear_session()\n", + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [], + "source": [ + "lower_layers = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(100, activation=\"relu\"),\n", + "])\n", + "upper_layers = keras.models.Sequential([\n", + " keras.layers.Dense(10, activation=\"softmax\"),\n", + "])\n", + "model = keras.models.Sequential([\n", + " lower_layers, upper_layers\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "metadata": {}, + "outputs": [], + "source": [ + "lower_optimizer = keras.optimizers.SGD(lr=1e-4)\n", + "upper_optimizer = keras.optimizers.Nadam(lr=1e-3)" + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "metadata": {}, + "outputs": [], + "source": [ + "n_epochs = 5\n", + "batch_size = 32\n", + "n_steps = len(X_train) // batch_size\n", + "loss_fn = keras.losses.sparse_categorical_crossentropy\n", + "mean_loss = keras.metrics.Mean()\n", + "metrics = [keras.metrics.SparseCategoricalAccuracy()]" + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": {}, + "outputs": [], + "source": [ + "with trange(1, n_epochs + 1, desc=\"All epochs\") as epochs:\n", + " for epoch in epochs:\n", + " with trange(1, n_steps + 1, desc=\"Epoch {}/{}\".format(epoch, n_epochs)) as steps:\n", + " for step in steps:\n", + " X_batch, y_batch = random_batch(X_train, y_train)\n", + " with tf.GradientTape(persistent=True) as tape:\n", + " y_pred = model(X_batch)\n", + " main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))\n", + " loss = tf.add_n([main_loss] + model.losses)\n", + " for layers, optimizer in ((lower_layers, lower_optimizer),\n", + " (upper_layers, upper_optimizer)):\n", + " gradients = tape.gradient(loss, layers.trainable_variables)\n", + " optimizer.apply_gradients(zip(gradients, layers.trainable_variables))\n", + " del tape\n", + " for variable in model.variables:\n", + " if variable.constraint is not None:\n", + " variable.assign(variable.constraint(variable)) \n", + " status = OrderedDict()\n", + " mean_loss(loss)\n", + " status[\"loss\"] = mean_loss.result().numpy()\n", + " for metric in metrics:\n", + " metric(y_batch, y_pred)\n", + " status[metric.name] = metric.result().numpy()\n", + " steps.set_postfix(status)\n", + " y_pred = model(X_valid)\n", + " status[\"val_loss\"] = np.mean(loss_fn(y_valid, y_pred))\n", + " status[\"val_accuracy\"] = np.mean(keras.metrics.sparse_categorical_accuracy(\n", + " tf.constant(y_valid, dtype=np.float32), y_pred))\n", + " steps.set_postfix(status)\n", + " for metric in [mean_loss] + metrics:\n", + " metric.reset_states()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -3334,7 +3908,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.6" } }, "nbformat": 4,