From 74794da1de2190fddadbfef988f2126aece86db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Wed, 7 Jun 2017 17:52:59 +0200 Subject: [PATCH] Use np.random.set_seed(42) and tf.set_random_seed(42) to make notebook's output constant, and simplify code in notebook 15 --- 12_distributed_tensorflow.ipynb | 20 +- 14_recurrent_neural_networks.ipynb | 262 +++--- 15_autoencoders.ipynb | 1323 +++++++++++++++++----------- 3 files changed, 957 insertions(+), 648 deletions(-) diff --git a/12_distributed_tensorflow.ipynb b/12_distributed_tensorflow.ipynb index b95ee70..ce69dc0 100644 --- a/12_distributed_tensorflow.ipynb +++ b/12_distributed_tensorflow.ipynb @@ -55,11 +55,13 @@ "\n", "# Common imports\n", "import numpy as np\n", - "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", - "rnd.seed(42)\n", + "def reset_graph(seed=42):\n", + " tf.reset_default_graph()\n", + " tf.set_random_seed(seed)\n", + " np.random.seed(seed)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", @@ -201,7 +203,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "with tf.device(\"/job:ps\"):\n", " a = tf.Variable(1.0, name=\"a\")\n", @@ -238,7 +240,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "with tf.device(tf.train.replica_device_setter(\n", " ps_tasks=2,\n", @@ -280,7 +282,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "test_csv = open(\"my_test.csv\", \"w\")\n", "test_csv.write(\"x1, x2 , target\\n\")\n", @@ -362,7 +364,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "filename_queue = tf.FIFOQueue(capacity=10, dtypes=[tf.string], shapes=[()])\n", "filename = tf.placeholder(tf.string)\n", @@ -409,7 +411,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "def read_and_push_instance(filename_queue, instance_queue):\n", " reader = tf.TextLineReader(skip_header_lines=1)\n", @@ -467,7 +469,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "q = tf.FIFOQueue(capacity=10, dtypes=[tf.float32], shapes=[()])\n", "v = tf.placeholder(tf.float32)\n", @@ -515,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "collapsed": true, "deletable": true, diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb index 380ff29..f4d541c 100644 --- a/14_recurrent_neural_networks.ipynb +++ b/14_recurrent_neural_networks.ipynb @@ -55,11 +55,13 @@ "\n", "# Common imports\n", "import numpy as np\n", - "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", - "rnd.seed(42)\n", + "def reset_graph(seed=42):\n", + " tf.reset_default_graph()\n", + " tf.set_random_seed(seed)\n", + " np.random.seed(seed)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", @@ -134,7 +136,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 3\n", "n_neurons = 5\n", @@ -205,7 +207,7 @@ "editable": true }, "source": [ - "## Using `rnn()`" + "## Using `static_rnn()`" ] }, { @@ -218,8 +220,6 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", "n_inputs = 3\n", "n_neurons = 5" ] @@ -234,6 +234,8 @@ }, "outputs": [], "source": [ + "reset_graph()\n", + "\n", "X0 = tf.placeholder(tf.float32, [None, n_inputs])\n", "X1 = tf.placeholder(tf.float32, [None, n_inputs])\n", "\n", @@ -381,8 +383,6 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", "n_steps = 2\n", "n_inputs = 3\n", "n_neurons = 5" @@ -398,6 +398,8 @@ }, "outputs": [], "source": [ + "reset_graph()\n", + "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))\n", "\n", @@ -446,6 +448,17 @@ { "cell_type": "code", "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(outputs_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, @@ -468,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": { "collapsed": true, "deletable": true, @@ -476,8 +489,6 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", "n_steps = 2\n", "n_inputs = 3\n", "n_neurons = 5" @@ -485,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, @@ -493,6 +504,8 @@ }, "outputs": [], "source": [ + "reset_graph()\n", + "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", @@ -501,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": { "collapsed": true, "deletable": true, @@ -514,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, @@ -531,12 +544,23 @@ "\n", "with tf.Session() as sess:\n", " init.run()\n", - " print(\"outputs =\", outputs.eval(feed_dict={X: X_batch}))" + " outputs_val = outputs.eval(feed_dict={X: X_batch})" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(outputs_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, @@ -559,7 +583,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 27, "metadata": { "collapsed": true, "deletable": true, @@ -567,19 +591,19 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", "n_steps = 2\n", "n_inputs = 3\n", "n_neurons = 5\n", "\n", + "reset_graph()\n", + "\n", "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", "basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 28, "metadata": { "collapsed": true, "deletable": true, @@ -594,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 29, "metadata": { "collapsed": true, "deletable": true, @@ -607,7 +631,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, @@ -627,7 +651,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 31, "metadata": { "collapsed": true, "deletable": true, @@ -643,7 +667,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, @@ -656,7 +680,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, @@ -691,7 +715,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, @@ -699,7 +723,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_steps = 28\n", "n_inputs = 28\n", @@ -728,7 +752,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, @@ -744,7 +768,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, @@ -779,7 +803,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 37, "metadata": { "collapsed": true, "deletable": true, @@ -787,7 +811,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_steps = 28\n", "n_inputs = 28\n", @@ -801,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, @@ -821,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 39, "metadata": { "collapsed": true, "deletable": true, @@ -843,7 +867,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, @@ -878,7 +902,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 41, "metadata": { "collapsed": false, "deletable": true, @@ -901,7 +925,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, @@ -938,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 43, "metadata": { "collapsed": false, "deletable": true, @@ -951,7 +975,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, @@ -984,7 +1008,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 45, "metadata": { "collapsed": true, "deletable": true, @@ -992,7 +1016,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_steps = 20\n", "n_inputs = 1\n", @@ -1018,7 +1042,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 46, "metadata": { "collapsed": true, "deletable": true, @@ -1026,7 +1050,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_steps = 20\n", "n_inputs = 1\n", @@ -1039,7 +1063,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, @@ -1054,7 +1078,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 48, "metadata": { "collapsed": true, "deletable": true, @@ -1067,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 49, "metadata": { "collapsed": true, "deletable": true, @@ -1086,7 +1110,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 50, "metadata": { "collapsed": true, "deletable": true, @@ -1099,7 +1123,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 51, "metadata": { "collapsed": false, "deletable": true, @@ -1124,7 +1148,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 52, "metadata": { "collapsed": false, "deletable": true, @@ -1141,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 53, "metadata": { "collapsed": false, "deletable": true, @@ -1154,7 +1178,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 54, "metadata": { "collapsed": false, "deletable": true, @@ -1185,7 +1209,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 55, "metadata": { "collapsed": true, "deletable": true, @@ -1193,7 +1217,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_steps = 20\n", "n_inputs = 1\n", @@ -1205,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 56, "metadata": { "collapsed": false, "deletable": true, @@ -1219,7 +1243,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 57, "metadata": { "collapsed": true, "deletable": true, @@ -1233,7 +1257,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 58, "metadata": { "collapsed": true, "deletable": true, @@ -1248,7 +1272,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 59, "metadata": { "collapsed": true, "deletable": true, @@ -1266,7 +1290,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1294,7 +1318,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 61, "metadata": { "collapsed": false, "deletable": true, @@ -1307,7 +1331,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 62, "metadata": { "collapsed": false, "deletable": true, @@ -1337,7 +1361,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 63, "metadata": { "collapsed": false, "deletable": true, @@ -1357,7 +1381,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 64, "metadata": { "collapsed": false, "deletable": true, @@ -1375,7 +1399,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 65, "metadata": { "collapsed": false, "deletable": true, @@ -1435,7 +1459,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 66, "metadata": { "collapsed": true, "deletable": true, @@ -1443,7 +1467,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 2\n", "n_steps = 5\n", @@ -1453,7 +1477,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 67, "metadata": { "collapsed": true, "deletable": true, @@ -1472,7 +1496,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 68, "metadata": { "collapsed": true, "deletable": true, @@ -1485,7 +1509,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 69, "metadata": { "collapsed": true, "deletable": true, @@ -1498,7 +1522,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 70, "metadata": { "collapsed": true, "deletable": true, @@ -1513,7 +1537,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 71, "metadata": { "collapsed": false, "deletable": true, @@ -1546,7 +1570,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 72, "metadata": { "collapsed": true, "deletable": true, @@ -1573,7 +1597,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 73, "metadata": { "collapsed": false, "deletable": true, @@ -1603,7 +1627,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 74, "metadata": { "collapsed": true, "deletable": true, @@ -1611,7 +1635,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 5\n", "n_steps = 20\n", @@ -1622,7 +1646,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 75, "metadata": { "collapsed": false, "deletable": true, @@ -1639,7 +1663,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 76, "metadata": { "collapsed": true, "deletable": true, @@ -1652,7 +1676,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 77, "metadata": { "collapsed": false, "deletable": true, @@ -1678,7 +1702,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 78, "metadata": { "collapsed": true, "deletable": true, @@ -1686,7 +1710,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 1\n", "n_neurons = 100\n", @@ -1700,7 +1724,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 79, "metadata": { "collapsed": false, "deletable": true, @@ -1720,7 +1744,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 80, "metadata": { "collapsed": true, "deletable": true, @@ -1754,7 +1778,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 81, "metadata": { "collapsed": false, "deletable": true, @@ -1788,7 +1812,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 82, "metadata": { "collapsed": true, "deletable": true, @@ -1796,7 +1820,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 1\n", "n_neurons = 100\n", @@ -1828,7 +1852,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 83, "metadata": { "collapsed": false, "deletable": true, @@ -1874,7 +1898,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 84, "metadata": { "collapsed": false, "deletable": true, @@ -1882,7 +1906,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "import sys\n", "training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n", @@ -1934,7 +1958,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 85, "metadata": { "collapsed": true, "deletable": true, @@ -1942,12 +1966,14 @@ }, "outputs": [], "source": [ + "reset_graph()\n", + "\n", "lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons)" ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 86, "metadata": { "collapsed": true, "deletable": true, @@ -1955,8 +1981,6 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", "n_steps = 28\n", "n_inputs = 28\n", "n_neurons = 150\n", @@ -1986,7 +2010,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 87, "metadata": { "collapsed": false, "deletable": true, @@ -1999,7 +2023,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 88, "metadata": { "collapsed": false, "deletable": true, @@ -2012,7 +2036,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 89, "metadata": { "collapsed": false, "deletable": true, @@ -2038,7 +2062,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 90, "metadata": { "collapsed": true, "deletable": true, @@ -2051,7 +2075,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 91, "metadata": { "collapsed": true, "deletable": true, @@ -2094,7 +2118,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 92, "metadata": { "collapsed": true, "deletable": true, @@ -2137,7 +2161,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 93, "metadata": { "collapsed": false, "deletable": true, @@ -2150,7 +2174,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 94, "metadata": { "collapsed": false, "deletable": true, @@ -2173,7 +2197,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 95, "metadata": { "collapsed": false, "deletable": true, @@ -2193,7 +2217,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 96, "metadata": { "collapsed": false, "deletable": true, @@ -2206,7 +2230,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 97, "metadata": { "collapsed": false, "deletable": true, @@ -2219,7 +2243,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 98, "metadata": { "collapsed": false, "deletable": true, @@ -2242,7 +2266,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 99, "metadata": { "collapsed": true, "deletable": true, @@ -2280,7 +2304,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 100, "metadata": { "collapsed": false, "deletable": true, @@ -2294,7 +2318,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 101, "metadata": { "collapsed": false, "deletable": true, @@ -2307,7 +2331,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 102, "metadata": { "collapsed": false, "deletable": true, @@ -2330,7 +2354,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 103, "metadata": { "collapsed": true, "deletable": true, @@ -2356,7 +2380,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 104, "metadata": { "collapsed": true, "deletable": true, @@ -2364,7 +2388,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "# Input data.\n", "train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])\n", @@ -2373,7 +2397,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 105, "metadata": { "collapsed": false, "deletable": true, @@ -2391,7 +2415,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 106, "metadata": { "collapsed": true, "deletable": true, @@ -2405,7 +2429,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 107, "metadata": { "collapsed": true, "deletable": true, @@ -2452,7 +2476,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 108, "metadata": { "collapsed": false, "deletable": true, @@ -2511,7 +2535,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 109, "metadata": { "collapsed": false, "deletable": true, @@ -2534,7 +2558,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 110, "metadata": { "collapsed": true, "deletable": true, @@ -2558,7 +2582,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 111, "metadata": { "collapsed": false, "deletable": true, @@ -2597,7 +2621,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 112, "metadata": { "collapsed": false, "deletable": true, @@ -2606,7 +2630,7 @@ "outputs": [], "source": [ "import tensorflow as tf\n", - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_steps = 50\n", "n_neurons = 200\n", @@ -2642,7 +2666,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 113, "metadata": { "collapsed": false, "deletable": true, diff --git a/15_autoencoders.ipynb b/15_autoencoders.ipynb index 4f7156e..e984ed3 100644 --- a/15_autoencoders.ipynb +++ b/15_autoencoders.ipynb @@ -55,12 +55,14 @@ "\n", "# Common imports\n", "import numpy as np\n", - "import numpy.random as rnd\n", "import os\n", "import sys\n", "\n", "# to make this notebook's output stable across runs\n", - "rnd.seed(42)\n", + "def reset_graph(seed=42):\n", + " tf.reset_default_graph()\n", + " tf.set_random_seed(seed)\n", + " np.random.seed(seed)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", @@ -159,15 +161,15 @@ "outputs": [], "source": [ "rnd.seed(4)\n", - "m = 100\n", + "m = 200\n", "w1, w2 = 0.1, 0.3\n", "noise = 0.1\n", "\n", "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n", - "X_train = np.empty((m, 3))\n", - "X_train[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n", - "X_train[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n", - "X_train[:, 2] = X_train[:, 0] * w1 + X_train[:, 1] * w2 + noise * rnd.randn(m)" + "data = np.empty((m, 3))\n", + "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n", + "data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n", + "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)" ] }, { @@ -192,30 +194,8 @@ "source": [ "from sklearn.preprocessing import StandardScaler\n", "scaler = StandardScaler()\n", - "X_train = scaler.fit_transform(X_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "Going to need TensorFlow..." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "import tensorflow as tf" + "X_train = scaler.fit_transform(data[:100])\n", + "X_test = scaler.transform(data[100:])" ] }, { @@ -246,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, @@ -254,7 +234,9 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "import tensorflow as tf\n", + "\n", + "reset_graph()\n", "\n", "n_inputs = 3\n", "n_hidden = 2 # codings\n", @@ -266,17 +248,17 @@ "hidden = tf.layers.dense(X, n_hidden)\n", "outputs = tf.layers.dense(hidden, n_outputs)\n", "\n", - "mse = tf.reduce_mean(tf.square(outputs - X))\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", "\n", "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(mse)\n", + "training_op = optimizer.minimize(reconstruction_loss)\n", "\n", "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, @@ -284,19 +266,19 @@ }, "outputs": [], "source": [ - "n_iterations = 10000\n", + "n_iterations = 1000\n", "codings = hidden\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " training_op.run(feed_dict={X: X_train})\n", - " codings_val = codings.eval(feed_dict={X: X_train})" + " codings_val = codings.eval(feed_dict={X: X_test})" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, @@ -334,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, @@ -378,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, @@ -386,11 +368,11 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "from functools import partial\n", "\n", - "n_inputs = 28*28\n", + "n_inputs = 28 * 28\n", "n_hidden1 = 300\n", "n_hidden2 = 150 # codings\n", "n_hidden3 = n_hidden1\n", @@ -399,33 +381,32 @@ "learning_rate = 0.01\n", "l2_reg = 0.0001\n", "\n", - "initializer = tf.contrib.layers.variance_scaling_initializer() # He initialization\n", - "#Equivalent to:\n", - "#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n", - "\n", "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", "\n", - "my_dense_layer = partial(\n", - " tf.layers.dense,\n", - " activation=tf.nn.elu,\n", - " kernel_initializer=initializer,\n", - " kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n", + "he_init = tf.contrib.layers.variance_scaling_initializer() # He initialization\n", + "#Equivalent to:\n", + "#he_init = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n", + "l2_regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", + "my_dense_layer = partial(tf.layers.dense,\n", + " activation=tf.nn.elu,\n", + " kernel_initializer=he_init,\n", + " kernel_regularizer=l2_regularizer)\n", "\n", "hidden1 = my_dense_layer(X, n_hidden1)\n", "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", "hidden3 = my_dense_layer(hidden2, n_hidden3)\n", "outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n", "\n", - "mse = tf.reduce_mean(tf.square(outputs - X))\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", "\n", "reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", - "loss = tf.add_n([mse] + reg_losses)\n", + "loss = tf.add_n([reconstruction_loss] + reg_losses)\n", "\n", "optimizer = tf.train.AdamOptimizer(learning_rate)\n", "training_op = optimizer.minimize(loss)\n", "\n", "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" + "saver = tf.train.Saver() # not shown in the book" ] }, { @@ -440,7 +421,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, @@ -448,7 +429,7 @@ }, "outputs": [], "source": [ - "n_epochs = 4\n", + "n_epochs = 5\n", "batch_size = 150\n", "\n", "with tf.Session() as sess:\n", @@ -456,13 +437,13 @@ " for epoch in range(n_epochs):\n", " n_batches = mnist.train.num_examples // batch_size\n", " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", + " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\") # not shown in the book\n", + " sys.stdout.flush() # not shown\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch})\n", - " mse_train = mse.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", - " saver.save(sess, \"./my_model_all_layers.ckpt\")" + " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch}) # not shown\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train) # not shown\n", + " saver.save(sess, \"./my_model_all_layers.ckpt\") # not shown" ] }, { @@ -477,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, @@ -502,7 +483,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": { "collapsed": false, "deletable": true, @@ -514,6 +495,146 @@ "save_fig(\"reconstruction_plot\")" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Tying weights" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "reset_graph()\n", + "\n", + "n_inputs = 28 * 28\n", + "n_hidden1 = 300\n", + "n_hidden2 = 150 # codings\n", + "n_hidden3 = n_hidden1\n", + "n_outputs = n_inputs\n", + "\n", + "learning_rate = 0.01\n", + "l2_reg = 0.0005" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "activation = tf.nn.elu\n", + "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", + "initializer = tf.contrib.layers.variance_scaling_initializer()\n", + "\n", + "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", + "\n", + "weights1_init = initializer([n_inputs, n_hidden1])\n", + "weights2_init = initializer([n_hidden1, n_hidden2])\n", + "\n", + "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", + "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", + "weights3 = tf.transpose(weights2, name=\"weights3\") # tied weights\n", + "weights4 = tf.transpose(weights1, name=\"weights4\") # tied weights\n", + "\n", + "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", + "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n", + "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n", + "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n", + "\n", + "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", + "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", + "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n", + "outputs = tf.matmul(hidden3, weights4) + biases4\n", + "\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", + "reg_loss = regularizer(weights1) + regularizer(weights2)\n", + "loss = reconstruction_loss + reg_loss\n", + "\n", + "optimizer = tf.train.AdamOptimizer(learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "\n", + "init = tf.global_variables_initializer()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_epochs = 5\n", + "batch_size = 150\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " n_batches = mnist.train.num_examples // batch_size\n", + " for iteration in range(n_batches):\n", + " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", + " sys.stdout.flush()\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch})\n", + " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", + " saver.save(sess, \"./my_model_tying_weights.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "show_reconstructed_digits(X, outputs, \"./my_model_tying_weights.ckpt\")" + ] + }, { "cell_type": "markdown", "metadata": { @@ -541,12 +662,12 @@ "editable": true }, "source": [ - "Let's create a function that will train one autoencoder and return the transformed training set (ie. the output of the hidden layer) and the model parameters." + "Let's create a function that will train one autoencoder and return the transformed training set (i.e., the output of the hidden layer) and the model parameters." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "metadata": { "collapsed": true, "deletable": true, @@ -554,11 +675,17 @@ }, "outputs": [], "source": [ + "reset_graph()\n", + "\n", "from functools import partial\n", "\n", - "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n", + "def train_autoencoder(X_train, n_neurons, n_epochs, batch_size,\n", + " learning_rate = 0.01, l2_reg = 0.0005,\n", + " activation=tf.nn.elu, seed=42):\n", " graph = tf.Graph()\n", " with graph.as_default():\n", + " tf.set_random_seed(seed)\n", + "\n", " n_inputs = X_train.shape[1]\n", "\n", " X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", @@ -572,10 +699,10 @@ " hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n", " outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n", "\n", - " mse = tf.reduce_mean(tf.square(outputs - X))\n", + " reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", "\n", " reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)\n", - " loss = tf.add_n([mse] + reg_losses)\n", + " loss = tf.add_n([reconstruction_loss] + reg_losses)\n", "\n", " optimizer = tf.train.AdamOptimizer(learning_rate)\n", " training_op = optimizer.minimize(loss)\n", @@ -592,8 +719,8 @@ " indices = rnd.permutation(len(X_train))[:batch_size]\n", " X_batch = X_train[indices]\n", " sess.run(training_op, feed_dict={X: X_batch})\n", - " mse_train = mse.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", + " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", " params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n", " hidden_val = hidden.eval(feed_dict={X: X_train})\n", " return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]" @@ -611,7 +738,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, @@ -635,7 +762,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, @@ -643,7 +770,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 28*28\n", "\n", @@ -656,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, @@ -689,15 +816,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 23, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 28 * 28\n", "n_hidden1 = 300\n", @@ -734,29 +861,53 @@ "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n", "outputs = tf.matmul(hidden3, weights4) + biases4\n", "\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "optimizer = tf.train.AdamOptimizer(learning_rate)\n", "\n", "with tf.name_scope(\"phase1\"):\n", - " optimizer = tf.train.AdamOptimizer(learning_rate)\n", " phase1_outputs = tf.matmul(hidden1, weights4) + biases4 # bypass hidden2 and hidden3\n", - " phase1_mse = tf.reduce_mean(tf.square(phase1_outputs - X))\n", + " phase1_reconstruction_loss = tf.reduce_mean(tf.square(phase1_outputs - X))\n", " phase1_reg_loss = regularizer(weights1) + regularizer(weights4)\n", - " phase1_loss = phase1_mse + phase1_reg_loss\n", + " phase1_loss = phase1_reconstruction_loss + phase1_reg_loss\n", " phase1_training_op = optimizer.minimize(phase1_loss)\n", "\n", "with tf.name_scope(\"phase2\"):\n", - " optimizer = tf.train.AdamOptimizer(learning_rate)\n", - " phase2_mse = tf.reduce_mean(tf.square(hidden3 - hidden1))\n", + " phase2_reconstruction_loss = tf.reduce_mean(tf.square(hidden3 - hidden1))\n", " phase2_reg_loss = regularizer(weights2) + regularizer(weights3)\n", - " phase2_loss = phase2_mse + phase2_reg_loss\n", - " phase2_training_op = optimizer.minimize(phase2_loss, var_list=[weights2, biases2, weights3, biases3]) # freeze hidden1\n", - " \n", + " phase2_loss = phase2_reconstruction_loss + phase2_reg_loss\n", + " train_vars = [weights2, biases2, weights3, biases3]\n", + " phase2_training_op = optimizer.minimize(phase2_loss, var_list=train_vars) # freeze hidden1" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, @@ -765,7 +916,7 @@ "outputs": [], "source": [ "training_ops = [phase1_training_op, phase2_training_op]\n", - "mses = [phase1_mse, phase2_mse]\n", + "reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]\n", "n_epochs = [4, 4]\n", "batch_sizes = [150, 150]\n", "\n", @@ -780,24 +931,11 @@ " sys.stdout.flush()\n", " X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n", " sess.run(training_ops[phase], feed_dict={X: X_batch})\n", - " mse_train = mses[phase].eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", + " loss_train = reconstruction_losses[phase].eval(feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", " saver.save(sess, \"./my_model_one_at_a_time.ckpt\")\n", - " mse_test = mses[phase].eval(feed_dict={X: mnist.test.images})\n", - " print(\"Test MSE:\", mse_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_one_at_a_time.ckpt\")" + " loss_test = reconstruction_loss.eval(feed_dict={X: mnist.test.images})\n", + " print(\"Test MSE:\", loss_test)" ] }, { @@ -812,49 +950,7 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "training_ops = [phase1_training_op, phase2_training_op, training_op]\n", - "mses = [phase1_mse, phase2_mse, mse]\n", - "n_epochs = [4, 4]\n", - "batch_sizes = [150, 150]\n", - "\n", - "with tf.Session() as sess:\n", - " init.run()\n", - " for phase in range(2):\n", - " print(\"Training phase #{}\".format(phase + 1))\n", - " if phase == 1:\n", - " mnist_hidden1 = hidden1.eval(feed_dict={X: mnist.train.images})\n", - " for epoch in range(n_epochs[phase]):\n", - " n_batches = mnist.train.num_examples // batch_sizes[phase]\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " if phase == 1:\n", - " indices = rnd.permutation(len(mnist_hidden1))\n", - " hidden1_batch = mnist_hidden1[indices[:batch_sizes[phase]]]\n", - " feed_dict = {hidden1: hidden1_batch}\n", - " sess.run(training_ops[phase], feed_dict=feed_dict)\n", - " else:\n", - " X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n", - " feed_dict = {X: X_batch}\n", - " sess.run(training_ops[phase], feed_dict=feed_dict)\n", - " mse_train = mses[phase].eval(feed_dict=feed_dict)\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", - " saver.save(sess, \"./my_model_cache_frozen.ckpt\")\n", - " mse_test = mses[phase].eval(feed_dict={X: mnist.test.images})\n", - " print(\"Test MSE:\", mse_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, + "execution_count": 27, "metadata": { "collapsed": false, "deletable": true, @@ -863,115 +959,51 @@ }, "outputs": [], "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_cache_frozen.ckpt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Tying weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "tf.reset_default_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 300\n", - "n_hidden2 = 150 # codings\n", - "n_hidden3 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.01\n", - "l2_reg = 0.0005\n", - "\n", - "activation = tf.nn.elu\n", - "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "\n", - "weights1_init = initializer([n_inputs, n_hidden1])\n", - "weights2_init = initializer([n_hidden1, n_hidden2])\n", - "\n", - "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", - "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", - "weights3 = tf.transpose(weights2, name=\"weights3\") # tied weights\n", - "weights4 = tf.transpose(weights1, name=\"weights4\") # tied weights\n", - "\n", - "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", - "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n", - "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n", - "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n", - "\n", - "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", - "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", - "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n", - "outputs = tf.matmul(hidden3, weights4) + biases4\n", - "\n", - "mse = tf.reduce_mean(tf.square(outputs - X))\n", - "reg_loss = regularizer(weights1) + regularizer(weights2)\n", - "loss = mse + reg_loss\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "training_op = optimizer.minimize(loss)\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "n_epochs = 5\n", - "batch_size = 150\n", + "training_ops = [phase1_training_op, phase2_training_op]\n", + "reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]\n", + "n_epochs = [4, 4]\n", + "batch_sizes = [150, 150]\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", - " for epoch in range(n_epochs):\n", - " n_batches = mnist.train.num_examples // batch_size\n", - " for iteration in range(n_batches):\n", - " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", - " sys.stdout.flush()\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch})\n", - " mse_train = mse.eval(feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", - " saver.save(sess, \"./my_model_tying_weights.ckpt\")" + " for phase in range(2):\n", + " print(\"Training phase #{}\".format(phase + 1))\n", + " if phase == 1:\n", + " hidden1_cache = hidden1.eval(feed_dict={X: mnist.train.images})\n", + " for epoch in range(n_epochs[phase]):\n", + " n_batches = mnist.train.num_examples // batch_sizes[phase]\n", + " for iteration in range(n_batches):\n", + " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", + " sys.stdout.flush()\n", + " if phase == 1:\n", + " indices = rnd.permutation(mnist.train.num_examples)\n", + " hidden1_batch = hidden1_cache[indices[:batch_sizes[phase]]]\n", + " feed_dict = {hidden1: hidden1_batch}\n", + " sess.run(training_ops[phase], feed_dict=feed_dict)\n", + " else:\n", + " X_batch, y_batch = mnist.train.next_batch(batch_sizes[phase])\n", + " feed_dict = {X: X_batch}\n", + " sess.run(training_ops[phase], feed_dict=feed_dict)\n", + " loss_train = reconstruction_losses[phase].eval(feed_dict=feed_dict)\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", + " saver.save(sess, \"./my_model_cache_frozen.ckpt\")\n", + " loss_test = reconstruction_loss.eval(feed_dict={X: mnist.test.images})\n", + " print(\"Test MSE:\", loss_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Visualizing the Reconstructions" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, @@ -979,7 +1011,54 @@ }, "outputs": [], "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_tying_weights.ckpt\")" + "n_test_digits = 2\n", + "X_test = mnist.test.images[:n_test_digits]\n", + "\n", + "with tf.Session() as sess:\n", + " saver.restore(sess, \"./my_model_one_at_a_time.ckpt\") # not shown in the book\n", + " outputs_val = outputs.eval(feed_dict={X: X_test})\n", + "\n", + "def plot_image(image, shape=[28, 28]):\n", + " plt.imshow(image.reshape(shape), cmap=\"Greys\", interpolation=\"nearest\")\n", + " plt.axis(\"off\")\n", + "\n", + "for digit_index in range(n_test_digits):\n", + " plt.subplot(n_test_digits, 2, digit_index * 2 + 1)\n", + " plot_image(X_test[digit_index])\n", + " plt.subplot(n_test_digits, 2, digit_index * 2 + 2)\n", + " plot_image(outputs_val[digit_index])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Visualizing the extracted features" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " saver.restore(sess, \"./my_model_one_at_a_time.ckpt\") # not shown in the book\n", + " weights1_val = weights1.eval()\n", + "\n", + "for i in range(5):\n", + " plt.subplot(1, 5, i + 1)\n", + " plot_image(weights1_val.T[i])\n", + "\n", + "save_fig(\"extracted_features_plot\") # not shown\n", + "plt.show() # not shown" ] }, { @@ -992,9 +1071,19 @@ "# Unsupervised pretraining" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Let's create a small neural network for MNIST classification:" + ] + }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, @@ -1002,7 +1091,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 28 * 28\n", "n_hidden1 = 300\n", @@ -1061,7 +1150,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, @@ -1102,7 +1191,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, @@ -1146,7 +1235,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n", "* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n", @@ -1154,16 +1246,26 @@ ] }, { - "cell_type": "code", - "execution_count": 31, + "cell_type": "markdown", "metadata": { - "collapsed": false, + "deletable": true, + "editable": true + }, + "source": [ + "Using Gaussian noise:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 28 * 28\n", "n_hidden1 = 300\n", @@ -1171,42 +1273,47 @@ "n_hidden3 = n_hidden1\n", "n_outputs = n_inputs\n", "\n", - "learning_rate = 0.01\n", - "l2_reg = 0.00001\n", - "dropout_rate = 0.3\n", - "\n", - "activation = tf.nn.elu\n", - "regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", + "learning_rate = 0.01" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "noise_level = 1.0\n", "\n", "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n", + "X_noisy = X + noise_level * tf.random_normal(tf.shape(X))\n", "\n", - "X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n", - "\n", - "weights1_init = initializer([n_inputs, n_hidden1])\n", - "weights2_init = initializer([n_hidden1, n_hidden2])\n", - "\n", - "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", - "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", - "weights3 = tf.transpose(weights2, name=\"weights3\") # tied weights\n", - "weights4 = tf.transpose(weights1, name=\"weights4\") # tied weights\n", - "\n", - "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", - "biases2 = tf.Variable(tf.zeros(n_hidden2), name=\"biases2\")\n", - "biases3 = tf.Variable(tf.zeros(n_hidden3), name=\"biases3\")\n", - "biases4 = tf.Variable(tf.zeros(n_outputs), name=\"biases4\")\n", - "\n", - "hidden1 = activation(tf.matmul(X_drop, weights1) + biases1)\n", - "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", - "hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)\n", - "outputs = tf.matmul(hidden3, weights4) + biases4\n", + "hidden1 = tf.layers.dense(X_noisy, n_hidden1, activation=tf.nn.relu,\n", + " name=\"hidden1\")\n", + "hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book\n", + " name=\"hidden2\") # not shown\n", + "hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown\n", + " name=\"hidden3\") # not shown\n", + "outputs = tf.layers.dense(hidden3, n_outputs, name=\"outputs\") # not shown\n", "\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "mse = tf.reduce_mean(tf.square(outputs - X))\n", - "reg_loss = regularizer(weights1) + regularizer(weights2)\n", - "loss = mse + reg_loss\n", - "training_op = optimizer.minimize(loss)\n", + "training_op = optimizer.minimize(reconstruction_loss)\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" @@ -1214,7 +1321,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, @@ -1233,23 +1340,10 @@ " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", " sys.stdout.flush()\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", - " sess.run(training_op, feed_dict={X: X_batch, is_training: True})\n", - " mse_train = mse.eval(feed_dict={X: X_batch, is_training: False})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n", - " saver.save(sess, \"./my_model_stacked_denoising.ckpt\")" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "show_reconstructed_digits(X, outputs, \"./my_model_stacked_denoising.ckpt\")" + " sess.run(training_op, feed_dict={X: X_batch})\n", + " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", + " saver.save(sess, \"./my_model_stacked_denoising_gaussian.ckpt\")" ] }, { @@ -1259,40 +1353,113 @@ "editable": true }, "source": [ - "## Visualizing the extracted features" + "Using dropout:" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 37, "metadata": { - "collapsed": false, + "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_stacked_denoising.ckpt\")\n", - " weights1_val = weights1.eval()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "for i in range(5):\n", - " plt.subplot(1, 5, i + 1)\n", - " plot_image(weights1_val.T[i])\n", + "reset_graph()\n", "\n", - "save_fig(\"extracted_features_plot\")\n", - "plt.show()" + "n_inputs = 28 * 28\n", + "n_hidden1 = 300\n", + "n_hidden2 = 150 # codings\n", + "n_hidden3 = n_hidden1\n", + "n_outputs = n_inputs\n", + "\n", + "learning_rate = 0.01" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "dropout_rate = 0.3\n", + "\n", + "training = tf.placeholder_with_default(False, shape=(), name='training')\n", + "\n", + "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", + "X_drop = tf.layers.dropout(X, dropout_rate, training=training)\n", + "\n", + "hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu,\n", + " name=\"hidden1\")\n", + "hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # not shown in the book\n", + " name=\"hidden2\") # not shown\n", + "hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # not shown\n", + " name=\"hidden3\") # not shown\n", + "outputs = tf.layers.dense(hidden3, n_outputs, name=\"outputs\") # not shown\n", + "\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "optimizer = tf.train.AdamOptimizer(learning_rate)\n", + "training_op = optimizer.minimize(reconstruction_loss)\n", + " \n", + "init = tf.global_variables_initializer()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_epochs = 10\n", + "batch_size = 150\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " n_batches = mnist.train.num_examples // batch_size\n", + " for iteration in range(n_batches):\n", + " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\")\n", + " sys.stdout.flush()\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, training: True})\n", + " loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", loss_train)\n", + " saver.save(sess, \"./my_model_stacked_denoising_dropout.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "show_reconstructed_digits(X, outputs, \"./my_model_stacked_denoising_dropout.ckpt\")" ] }, { @@ -1307,7 +1474,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, @@ -1316,7 +1483,7 @@ "outputs": [], "source": [ "p = 0.1\n", - "q = np.linspace(0, 1, 500)\n", + "q = np.linspace(0.001, 0.999, 500)\n", "kl_div = p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q))\n", "mse = (p - q)**2\n", "plt.plot([p, p], [0, 0.3], \"k:\")\n", @@ -1332,7 +1499,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 43, "metadata": { "collapsed": true, "deletable": true, @@ -1340,14 +1507,16 @@ }, "outputs": [], "source": [ - "def kl_divergence(p, q):\n", - " \"\"\"Kullback Leibler divergence\"\"\"\n", - " return p * tf.log(p / q) + (1 - p) * tf.log((1 - p) / (1 - q))" + "reset_graph()\n", + "\n", + "n_inputs = 28 * 28\n", + "n_hidden1 = 1000 # sparse codings\n", + "n_outputs = n_inputs" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, @@ -1355,49 +1524,45 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", - "n_inputs = 28 * 28\n", - "n_hidden1 = 1000 # sparse codings\n", - "n_outputs = n_inputs\n", + "def kl_divergence(p, q):\n", + " # Kullback Leibler divergence\n", + " return p * tf.log(p / q) + (1 - p) * tf.log((1 - p) / (1 - q))\n", "\n", "learning_rate = 0.01\n", "sparsity_target = 0.1\n", "sparsity_weight = 0.2\n", "\n", - "#activation = tf.nn.softplus # soft variant of ReLU\n", - "activation = tf.nn.sigmoid\n", - "initializer = tf.contrib.layers.variance_scaling_initializer()\n", + "X = tf.placeholder(tf.float32, shape=[None, n_inputs]) # not shown in the book\n", "\n", - "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", - "\n", - "weights1_init = initializer([n_inputs, n_hidden1])\n", - "weights2_init = initializer([n_hidden1, n_outputs])\n", - "\n", - "weights1 = tf.Variable(weights1_init, dtype=tf.float32, name=\"weights1\")\n", - "weights2 = tf.Variable(weights2_init, dtype=tf.float32, name=\"weights2\")\n", - "\n", - "biases1 = tf.Variable(tf.zeros(n_hidden1), name=\"biases1\")\n", - "biases2 = tf.Variable(tf.zeros(n_outputs), name=\"biases2\")\n", - "\n", - "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", - "outputs = tf.matmul(hidden1, weights2) + biases2\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate)\n", - "mse = tf.reduce_mean(tf.square(outputs - X))\n", + "hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.sigmoid) # not shown\n", + "outputs = tf.layers.dense(hidden1, n_outputs) # not shown\n", "\n", "hidden1_mean = tf.reduce_mean(hidden1, axis=0) # batch mean\n", "sparsity_loss = tf.reduce_sum(kl_divergence(sparsity_target, hidden1_mean))\n", - "loss = mse + sparsity_weight * sparsity_loss\n", - "training_op = optimizer.minimize(loss)\n", + "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE\n", + "loss = reconstruction_loss + sparsity_weight * sparsity_loss\n", "\n", + "optimizer = tf.train.AdamOptimizer(learning_rate)\n", + "training_op = optimizer.minimize(loss)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, @@ -1417,14 +1582,14 @@ " sys.stdout.flush()\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch})\n", - " mse_val, sparsity_loss_val, loss_val = sess.run([mse, sparsity_loss, loss], feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_val, \"\\tSparsity loss:\", sparsity_loss_val, \"\\tTotal loss:\", loss_val)\n", + " reconstruction_loss_val, sparsity_loss_val, loss_val = sess.run([reconstruction_loss, sparsity_loss, loss], feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train MSE:\", reconstruction_loss_val, \"\\tSparsity loss:\", sparsity_loss_val, \"\\tTotal loss:\", loss_val)\n", " saver.save(sess, \"./my_model_sparse.ckpt\")" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, @@ -1435,6 +1600,56 @@ "show_reconstructed_digits(X, outputs, \"./my_model_sparse.ckpt\")" ] }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note that the coding layer must output values from 0 to 1, which is why we use the sigmoid activation function:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.sigmoid)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "To speed up training, you can normalize the inputs between 0 and 1, and use the cross entropy instead of the MSE for the cost function:" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "logits = tf.layers.dense(hidden1, n_outputs)\n", + "outputs = tf.nn.sigmoid(logits)\n", + "\n", + "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)\n", + "reconstruction_loss = tf.reduce_mean(xentropy)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1447,7 +1662,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 50, "metadata": { "collapsed": false, "deletable": true, @@ -1455,86 +1670,17 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", - "n_inputs = 28*28\n", - "n_hidden1 = 500\n", - "n_hidden2 = 500\n", - "n_hidden3 = 20 # codings\n", - "n_hidden4 = n_hidden2\n", - "n_hidden5 = n_hidden1\n", - "n_outputs = n_inputs\n", - "\n", - "learning_rate = 0.001\n", - "\n", - "activation = tf.nn.elu\n", - "initializer = tf.contrib.layers.variance_scaling_initializer(mode=\"FAN_AVG\",\n", - " uniform=True)\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_inputs])\n", - "\n", - "weights1 = tf.Variable(initializer([n_inputs, n_hidden1]))\n", - "weights2 = tf.Variable(initializer([n_hidden1, n_hidden2]))\n", - "weights3_mean = tf.Variable(initializer([n_hidden2, n_hidden3]))\n", - "weights3_log_sigma = tf.Variable(initializer([n_hidden2, n_hidden3]))\n", - "weights4 = tf.Variable(initializer([n_hidden3, n_hidden4]))\n", - "weights5 = tf.Variable(initializer([n_hidden4, n_hidden5]))\n", - "weights6 = tf.Variable(initializer([n_hidden5, n_inputs]))\n", - "\n", - "biases1 = tf.Variable(tf.zeros([n_hidden1], dtype=tf.float32))\n", - "biases2 = tf.Variable(tf.zeros([n_hidden2], dtype=tf.float32))\n", - "biases3_mean = tf.Variable(tf.zeros([n_hidden3], dtype=tf.float32))\n", - "biases3_log_sigma = tf.Variable(tf.zeros([n_hidden3], dtype=tf.float32))\n", - "biases4 = tf.Variable(tf.zeros([n_hidden4], dtype=tf.float32))\n", - "biases5 = tf.Variable(tf.zeros([n_hidden5], dtype=tf.float32))\n", - "biases6 = tf.Variable(tf.zeros([n_inputs], dtype=tf.float32))\n", - "\n", - "hidden1 = activation(tf.matmul(X, weights1) + biases1)\n", - "hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)\n", - "\n", - "hidden3_mean = tf.matmul(hidden2, weights3_mean) + biases3_mean\n", - "hidden3_log_sigma = tf.matmul(hidden2, weights3_log_sigma) + biases3_log_sigma\n", - "noise = tf.random_normal(tf.shape(hidden3_log_sigma), dtype=tf.float32)\n", - "hidden3 = hidden3_mean + tf.sqrt(tf.exp(hidden3_log_sigma)) * noise\n", - "\n", - "hidden4 = activation(tf.matmul(hidden3, weights4) + biases4)\n", - "hidden5 = activation(tf.matmul(hidden4, weights5) + biases5)\n", - "logits = tf.matmul(hidden5, weights6) + biases6\n", - "outputs = tf.sigmoid(logits)\n", - "\n", - "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n", - "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_log_sigma) + tf.square(hidden3_mean) - 1 - hidden3_log_sigma)\n", - "cost = reconstruction_loss + latent_loss\n", - "\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(cost)\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "from functools import partial\n", "\n", - "n_inputs = 28*28\n", + "n_inputs = 28 * 28\n", "n_hidden1 = 500\n", "n_hidden2 = 500\n", "n_hidden3 = 20 # codings\n", "n_hidden4 = n_hidden2\n", "n_hidden5 = n_hidden1\n", "n_outputs = n_inputs\n", - "\n", "learning_rate = 0.001\n", "\n", "initializer = tf.contrib.layers.variance_scaling_initializer()\n", @@ -1548,20 +1694,48 @@ "hidden1 = my_dense_layer(X, n_hidden1)\n", "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n", - "hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n", - "noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n", - "hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n", + "hidden3_sigma = my_dense_layer(hidden2, n_hidden3, activation=None)\n", + "noise = tf.random_normal(tf.shape(hidden3_sigma), dtype=tf.float32)\n", + "hidden3 = hidden3_mean + hidden3_sigma * noise\n", "hidden4 = my_dense_layer(hidden3, n_hidden4)\n", "hidden5 = my_dense_layer(hidden4, n_hidden5)\n", "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n", "outputs = tf.sigmoid(logits)\n", "\n", - "reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n", - "latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n", - "cost = reconstruction_loss + latent_loss\n", + "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)\n", + "reconstruction_loss = tf.reduce_sum(xentropy)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "eps = 1e-10 # smoothing term to avoid computing log(0) which is NaN\n", + "latent_loss = 0.5 * tf.reduce_sum(\n", + " tf.square(hidden3_sigma) + tf.square(hidden3_mean)\n", + " - 1 - tf.log(eps + tf.square(hidden3_sigma)))" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "loss = reconstruction_loss + latent_loss\n", "\n", "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(cost)\n", + "training_op = optimizer.minimize(loss)\n", "\n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" @@ -1569,7 +1743,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 53, "metadata": { "collapsed": false, "deletable": true, @@ -1589,11 +1763,185 @@ " sys.stdout.flush()\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch})\n", - " cost_val, reconstruction_loss_val, latent_loss_val = sess.run([cost, reconstruction_loss, latent_loss], feed_dict={X: X_batch})\n", - " print(\"\\r{}\".format(epoch), \"Train cost:\", cost_val, \"\\tReconstruction loss:\", reconstruction_loss_val, \"\\tLatent loss:\", latent_loss_val)\n", + " loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch})\n", + " print(\"\\r{}\".format(epoch), \"Train total loss:\", loss_val, \"\\tReconstruction loss:\", reconstruction_loss_val, \"\\tLatent loss:\", latent_loss_val)\n", " saver.save(sess, \"./my_model_variational.ckpt\")" ] }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "reset_graph()\n", + "\n", + "from functools import partial\n", + "\n", + "n_inputs = 28 * 28\n", + "n_hidden1 = 500\n", + "n_hidden2 = 500\n", + "n_hidden3 = 20 # codings\n", + "n_hidden4 = n_hidden2\n", + "n_hidden5 = n_hidden1\n", + "n_outputs = n_inputs\n", + "learning_rate = 0.001\n", + "\n", + "initializer = tf.contrib.layers.variance_scaling_initializer()\n", + "my_dense_layer = partial(\n", + " tf.layers.dense,\n", + " activation=tf.nn.elu,\n", + " kernel_initializer=initializer)\n", + "\n", + "X = tf.placeholder(tf.float32, [None, n_inputs])\n", + "hidden1 = my_dense_layer(X, n_hidden1)\n", + "hidden2 = my_dense_layer(hidden1, n_hidden2)\n", + "hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n", + "hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n", + "noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n", + "hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n", + "hidden4 = my_dense_layer(hidden3, n_hidden4)\n", + "hidden5 = my_dense_layer(hidden4, n_hidden5)\n", + "logits = my_dense_layer(hidden5, n_outputs, activation=None)\n", + "outputs = tf.sigmoid(logits)\n", + "\n", + "xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)\n", + "reconstruction_loss = tf.reduce_sum(xentropy)\n", + "latent_loss = 0.5 * tf.reduce_sum(\n", + " tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n", + "loss = reconstruction_loss + latent_loss\n", + "\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)\n", + "\n", + "init = tf.global_variables_initializer()\n", + "saver = tf.train.Saver()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Generate digits" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Let's train the model and generate a few random digits:" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "n_digits = 60\n", + "n_epochs = 50\n", + "batch_size = 150\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " for epoch in range(n_epochs):\n", + " n_batches = mnist.train.num_examples // batch_size\n", + " for iteration in range(n_batches):\n", + " print(\"\\r{}%\".format(100 * iteration // n_batches), end=\"\") # not shown in the book\n", + " sys.stdout.flush() # not shown\n", + " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch})\n", + " loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch}) # not shown\n", + " print(\"\\r{}\".format(epoch), \"Train total loss:\", loss_val, \"\\tReconstruction loss:\", reconstruction_loss_val, \"\\tLatent loss:\", latent_loss_val) # not shown\n", + " saver.save(sess, \"./my_model_variational.ckpt\") # not shown\n", + " \n", + " codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n", + " outputs_val = outputs.eval(feed_dict={hidden3: codings_rnd})" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(8,50)) # not shown in the book\n", + "for iteration in range(n_digits):\n", + " plt.subplot(n_digits, 10, iteration + 1)\n", + " plot_image(outputs_val[iteration])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "n_rows = 6\n", + "n_cols = 10\n", + "plot_multiple_images(outputs_val.reshape(-1, 28, 28), n_rows, n_cols)\n", + "save_fig(\"generated_digits_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "Note that the latent loss is computed differently in this second variant:" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "latent_loss = 0.5 * tf.reduce_sum(\n", + " tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Encode & Decode" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1606,7 +1954,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 59, "metadata": { "collapsed": false, "deletable": true, @@ -1635,7 +1983,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1660,7 +2008,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 61, "metadata": { "collapsed": false, "deletable": true, @@ -1676,71 +2024,6 @@ " plot_image(outputs_val[iteration])" ] }, - { - "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, - "source": [ - "## Generate digits" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "n_rows = 6\n", - "n_cols = 10\n", - "n_digits = n_rows * n_cols\n", - "codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n", - "\n", - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_variational.ckpt\")\n", - " outputs_val = outputs.eval(feed_dict={codings: codings_rnd})" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "plot_multiple_images(outputs_val.reshape(-1, 28, 28), n_rows, n_cols)\n", - "save_fig(\"generated_digits_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "n_rows = 6\n", - "n_cols = 10\n", - "n_digits = n_rows * n_cols\n", - "codings_rnd = np.random.normal(size=[n_digits, n_hidden3])\n", - "\n", - "with tf.Session() as sess:\n", - " saver.restore(sess, \"./my_model_variational.ckpt\")\n", - " outputs_val = outputs.eval(feed_dict={codings: codings_rnd})" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1753,7 +2036,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 62, "metadata": { "collapsed": false, "deletable": true,