diff --git a/09_up_and_running_with_tensorflow.ipynb b/09_up_and_running_with_tensorflow.ipynb index 93ad387..361cc17 100644 --- a/09_up_and_running_with_tensorflow.ipynb +++ b/09_up_and_running_with_tensorflow.ipynb @@ -101,25 +101,23 @@ }, "outputs": [], "source": [ - "import tensorflow as tf" + "import tensorflow as tf\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "x = tf.Variable(3, name=\"x\")\n", + "y = tf.Variable(4, name=\"y\")\n", + "f = x*x*y + y + 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": false }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", - "\n", - "x = tf.Variable(3, name=\"x\")\n", - "y = tf.Variable(4, name=\"y\")\n", - "f = x*x*y + y + 2\n", - "\n", "f" ] }, @@ -136,26 +134,19 @@ "sess = tf.Session()\n", "sess.run(x.initializer)\n", "sess.run(y.initializer)\n", - "print(sess.run(f))\n", - "sess.close()" + "result = sess.run(f)\n", + "print(result)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ - "with tf.Session() as sess:\n", - " x.initializer.run()\n", - " y.initializer.run()\n", - " result = f.eval()\n", - "\n", - "result" + "sess.close()" ] }, { @@ -168,18 +159,26 @@ }, "outputs": [], "source": [ - "init = tf.global_variables_initializer()\n", - "\n", - "with tf.Session():\n", - " init.run()\n", - " result = f.eval()\n", - "\n", - "result" + "with tf.Session() as sess:\n", + " x.initializer.run()\n", + " y.initializer.run()\n", + " result = f.eval()" ] }, { "cell_type": "code", "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, @@ -189,11 +188,68 @@ "source": [ "init = tf.global_variables_initializer()\n", "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " result = f.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "sess = tf.InteractiveSession()\n", "init.run()\n", "result = f.eval()\n", - "sess.close()\n", - "\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sess.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ "result" ] }, @@ -209,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": { "collapsed": false, "deletable": true, @@ -225,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "metadata": { "collapsed": false, "deletable": true, @@ -237,12 +293,12 @@ "with graph.as_default():\n", " x2 = tf.Variable(2)\n", "\n", - "x2.graph is tf.get_default_graph()" + "x2.graph is graph" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "metadata": { "collapsed": false, "deletable": true, @@ -251,12 +307,12 @@ }, "outputs": [], "source": [ - "x2.graph is graph" + "x2.graph is tf.get_default_graph()" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, @@ -276,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 18, "metadata": { "collapsed": false, "deletable": true, @@ -312,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 19, "metadata": { "collapsed": false, "deletable": true, @@ -320,34 +376,33 @@ }, "outputs": [], "source": [ + "import numpy as np\n", "from sklearn.datasets import fetch_california_housing\n", "\n", + "tf.reset_default_graph()\n", + "\n", "housing = fetch_california_housing()\n", "m, n = housing.data.shape\n", - "housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "tf.reset_default_graph()\n", + "housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]\n", "\n", - "X = tf.constant(housing_data_plus_bias, dtype=tf.float64, name=\"X\")\n", - "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float64, name=\"y\")\n", + "X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", "XT = tf.transpose(X)\n", "theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)\n", "\n", "with tf.Session() as sess:\n", - " result = theta.eval()\n", - "\n", - "print(result)" + " theta_value = theta.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "theta_value" ] }, { @@ -362,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, @@ -389,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, @@ -426,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 23, "metadata": { "collapsed": true, "deletable": true, @@ -442,7 +497,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, @@ -468,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 25, "metadata": { "collapsed": false, "deletable": true, @@ -500,10 +555,18 @@ " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", " sess.run(training_op)\n", " \n", - " best_theta = theta.eval()\n", - "\n", - "print(\"Best theta:\")\n", - "print(best_theta)" + " best_theta = theta.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "best_theta" ] }, { @@ -513,13 +576,19 @@ "editable": true }, "source": [ - "### Using autodiff\n", - "Same as above except for the `gradients = ...` line." + "### Using autodiff" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Same as above except for the `gradients = ...` line:" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 27, "metadata": { "collapsed": false, "deletable": true, @@ -537,8 +606,28 @@ "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", - "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", - "gradients = tf.gradients(mse, [theta])[0]\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "gradients = tf.gradients(mse, [theta])[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ "training_op = tf.assign(theta, theta - learning_rate * gradients)\n", "\n", "init = tf.global_variables_initializer()\n", @@ -557,6 +646,80 @@ "print(best_theta)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How could you find the partial derivatives of the following function with regards to `a` and `b`?" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def my_func(a, b):\n", + " z = 0\n", + " for i in range(100):\n", + " z = a * np.cos(z + i) + z * np.sin(b - i)\n", + " return z" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "my_func(0.2, 0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "a = tf.Variable(0.2, name=\"a\")\n", + "b = tf.Variable(0.3, name=\"b\")\n", + "z = tf.constant(0.0, name=\"z0\")\n", + "for i in range(100):\n", + " z = a * tf.cos(z + i) + z * tf.sin(b - i)\n", + "\n", + "grads = tf.gradients(z, [a, b])\n", + "init = tf.global_variables_initializer()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's compute the function at $a=0.2$ and $b=0.3$, and the partial derivatives at that point with regards to $a$ and with regards to $b$:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " init.run()\n", + " print(z.eval())\n", + " print(sess.run(grads))" + ] + }, { "cell_type": "markdown", "metadata": { @@ -569,7 +732,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, @@ -587,10 +750,29 @@ "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", - "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(mse)\n", - "\n", + "training_op = optimizer.minimize(mse)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ "init = tf.global_variables_initializer()\n", "\n", "with tf.Session() as sess:\n", @@ -619,7 +801,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 37, "metadata": { "collapsed": false, "deletable": true, @@ -637,8 +819,29 @@ "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", - "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", - "optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.25)\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,\n", + " momentum=0.9)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.global_variables_initializer()" @@ -646,7 +849,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, @@ -688,7 +891,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 41, "metadata": { "collapsed": false, "deletable": true, @@ -696,16 +899,26 @@ }, "outputs": [], "source": [ - ">>> tf.reset_default_graph()\n", + "tf.reset_default_graph()\n", "\n", - ">>> A = tf.placeholder(tf.float32, shape=(None, 3))\n", - ">>> B = A + 5\n", - ">>> with tf.Session() as sess:\n", - "... B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})\n", - "... B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})\n", - "...\n", - ">>> print(B_val_1)\n", - ">>> print(B_val_2)" + "A = tf.placeholder(tf.float32, shape=(None, 3))\n", + "B = A + 5\n", + "with tf.Session() as sess:\n", + " B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})\n", + " B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})\n", + "\n", + "print(B_val_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print(B_val_2)" ] }, { @@ -720,7 +933,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 43, "metadata": { "collapsed": false, "deletable": true, @@ -731,10 +944,29 @@ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", - "learning_rate = 0.01\n", - "\n", + "learning_rate = 0.01" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", - "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", + "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", @@ -747,25 +979,42 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 46, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": true + }, + "outputs": [], + "source": [ + "n_epochs = 10" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "batch_size = 100\n", + "n_batches = int(np.ceil(m / batch_size))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false }, "outputs": [], "source": [ "def fetch_batch(epoch, batch_index, batch_size):\n", - " rnd.seed(epoch * n_batches + batch_index)\n", - " indices = rnd.randint(m, size=batch_size)\n", - " X_batch = scaled_housing_data_plus_bias[indices]\n", - " y_batch = housing.target.reshape(-1, 1)[indices]\n", + " rnd.seed(epoch * n_batches + batch_index) # not shown in the book\n", + " indices = rnd.randint(m, size=batch_size) # not shown\n", + " X_batch = scaled_housing_data_plus_bias[indices] # not shown\n", + " y_batch = housing.target.reshape(-1, 1)[indices] # not shown\n", " return X_batch, y_batch\n", "\n", - "n_epochs = 10\n", - "batch_size = 100\n", - "n_batches = int(np.ceil(m / batch_size))\n", - "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", @@ -774,10 +1023,18 @@ " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", "\n", - " best_theta = theta.eval()\n", - " \n", - "print(\"Best theta:\")\n", - "print(best_theta)" + " best_theta = theta.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "best_theta" ] }, { @@ -792,7 +1049,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 50, "metadata": { "collapsed": false, "deletable": true, @@ -802,46 +1059,130 @@ "source": [ "tf.reset_default_graph()\n", "\n", - "n_epochs = 1000\n", - "learning_rate = 0.01\n", + "n_epochs = 1000 # not shown in the book\n", + "learning_rate = 0.01 # not shown\n", "\n", - "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", - "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", + "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\") # not shown\n", + "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\") # not shown\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", - "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", - "error = y_pred - y\n", - "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", - "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", - "training_op = optimizer.minimize(mse)\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\") # not shown\n", + "error = y_pred - y # not shown\n", + "mse = tf.reduce_mean(tf.square(error), name=\"mse\") # not shown\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # not shown\n", + "training_op = optimizer.minimize(mse) # not shown\n", "\n", "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" + "saver = tf.train.Saver()\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " if epoch % 100 == 0:\n", + " print(\"Epoch\", epoch, \"MSE =\", mse.eval()) # not shown\n", + " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", + " sess.run(training_op)\n", + " \n", + " best_theta = theta.eval()\n", + " save_path = saver.save(sess, \"/tmp/my_model_final.ckpt\")" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 51, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], + "source": [ + "best_theta" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false + }, + "outputs": [], "source": [ "with tf.Session() as sess:\n", - " sess.run(init)\n", + " saver.restore(sess, \"/tmp/my_model_final.ckpt\")\n", + " best_theta_restored = theta.eval() # not shown in the book" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(best_theta, best_theta_restored)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to have a saver that loads and restores `theta` with a different name, such as `\"weights\"`:" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "saver = tf.train.Saver({\"weights\": theta})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default the saver also saves the graph structure itself in a second file with the extension `.meta`. You can use the function `tf.train.import_meta_graph()` to restore the graph structure. This function loads the graph into the default graph and returns a `Saver` that can then be used to restore the graph state (i.e., the variable values):" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "tf.reset_default_graph() # notice that we start with an empty graph.\n", "\n", - " for epoch in range(n_epochs):\n", - " if epoch % 100 == 0:\n", - " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", - " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", - " sess.run(training_op)\n", - " \n", - " best_theta = theta.eval()\n", - " save_path = saver.save(sess, \"my_model_final.ckpt\")\n", + "saver = tf.train.import_meta_graph(\"/tmp/my_model_final.ckpt.meta\") # this loads the graph structure\n", + "theta = tf.get_default_graph().get_tensor_by_name(\"theta:0\") # not shown in the book\n", "\n", - "print(\"Best theta:\")\n", - "print(best_theta)" + "with tf.Session() as sess:\n", + " saver.restore(sess, \"/tmp/my_model_final.ckpt\") # this restores the graph's state\n", + " best_theta_restored = theta.eval() # not shown in the book" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.allclose(best_theta, best_theta_restored)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This means that you can import a pretrained model without having to have the corresponding Python code to build the graph. This is very handy when you keep tweaking and saving your model: you can load a previously saved model without having to search for the version of the code that built it." ] }, { @@ -857,7 +1198,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 57, "metadata": { "collapsed": true, "deletable": true, @@ -905,7 +1246,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 58, "metadata": { "collapsed": false, "deletable": true, @@ -929,7 +1270,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 59, "metadata": { "collapsed": false, "deletable": true, @@ -943,8 +1284,17 @@ "\n", "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n", "root_logdir = \"tf_logs\"\n", - "logdir = \"{}/run-{}/\".format(root_logdir, now)\n", - "\n", + "logdir = \"{}/run-{}/\".format(root_logdir, now)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", @@ -957,15 +1307,37 @@ "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", - "init = tf.global_variables_initializer()\n", - "\n", - "mse_summary = tf.summary.scalar('MSE', mse)\n", - "summary_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())" + "init = tf.global_variables_initializer()" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "mse_summary = tf.summary.scalar('MSE', mse)\n", + "file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "n_epochs = 10\n", + "batch_size = 100\n", + "n_batches = int(np.ceil(m / batch_size))" + ] + }, + { + "cell_type": "code", + "execution_count": 63, "metadata": { "collapsed": false, "deletable": true, @@ -973,28 +1345,41 @@ }, "outputs": [], "source": [ - "n_epochs = 10\n", - "batch_size = 100\n", - "n_batches = int(np.ceil(m / batch_size))\n", + "with tf.Session() as sess: # not shown in the book\n", + " sess.run(init) # not shown\n", "\n", - "with tf.Session() as sess:\n", - " sess.run(init)\n", - "\n", - " for epoch in range(n_epochs):\n", + " for epoch in range(n_epochs): # not shown\n", " for batch_index in range(n_batches):\n", " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", " if batch_index % 10 == 0:\n", " summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n", " step = epoch * n_batches + batch_index\n", - " summary_writer.add_summary(summary_str, step)\n", + " file_writer.add_summary(summary_str, step)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", "\n", - " best_theta = theta.eval()\n", - "\n", - "summary_writer.flush()\n", - "summary_writer.close()\n", - "print(\"Best theta:\")\n", - "print(best_theta)" + " best_theta = theta.eval() # not shown" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "file_writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "best_theta" ] }, { @@ -1009,7 +1394,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 66, "metadata": { "collapsed": false, "deletable": true, @@ -1029,22 +1414,42 @@ "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", - "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", - "with tf.name_scope('loss') as scope:\n", + "y_pred = tf.matmul(X, theta, name=\"predictions\")" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "with tf.name_scope(\"loss\") as scope:\n", " error = y_pred - y\n", - " mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", + " mse = tf.reduce_mean(tf.square(error), name=\"mse\")" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.global_variables_initializer()\n", "\n", "mse_summary = tf.summary.scalar('MSE', mse)\n", - "summary_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())" + "file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 69, "metadata": { "collapsed": false, "deletable": true, @@ -1065,20 +1470,20 @@ " if batch_index % 10 == 0:\n", " summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n", " step = epoch * n_batches + batch_index\n", - " summary_writer.add_summary(summary_str, step)\n", + " file_writer.add_summary(summary_str, step)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", "\n", " best_theta = theta.eval()\n", "\n", - "summary_writer.flush()\n", - "summary_writer.close()\n", + "file_writer.flush()\n", + "file_writer.close()\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 70, "metadata": { "collapsed": false, "deletable": true, @@ -1091,7 +1496,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 71, "metadata": { "collapsed": false, "deletable": true, @@ -1104,7 +1509,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 72, "metadata": { "collapsed": false, "deletable": true, @@ -1149,7 +1554,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 73, "metadata": { "collapsed": false, "deletable": true, @@ -1167,13 +1572,13 @@ "b1 = tf.Variable(0.0, name=\"bias1\")\n", "b2 = tf.Variable(0.0, name=\"bias2\")\n", "\n", - "linear1 = tf.add(tf.matmul(X, w1), b1, name=\"linear1\")\n", - "linear2 = tf.add(tf.matmul(X, w2), b2, name=\"linear2\")\n", + "z1 = tf.add(tf.matmul(X, w1), b1, name=\"z1\")\n", + "z2 = tf.add(tf.matmul(X, w2), b2, name=\"z2\")\n", "\n", - "relu1 = tf.maximum(linear1, 0, name=\"relu1\")\n", - "relu2 = tf.maximum(linear1, 0, name=\"relu2\") # Oops, cut&paste error! Did you spot it?\n", + "relu1 = tf.maximum(z1, 0., name=\"relu1\")\n", + "relu2 = tf.maximum(z1, 0., name=\"relu2\") # Oops, cut&paste error! Did you spot it?\n", "\n", - "output = tf.add_n([relu1, relu2], name=\"output\")" + "output = tf.add(relu1, relu2, name=\"output\")" ] }, { @@ -1188,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 74, "metadata": { "collapsed": false, "deletable": true, @@ -1199,17 +1604,27 @@ "tf.reset_default_graph()\n", "\n", "def relu(X):\n", - " w_shape = int(X.get_shape()[1]), 1\n", + " w_shape = (int(X.get_shape()[1]), 1)\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", - " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", - " return tf.maximum(linear, 0, name=\"relu\")\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\")\n", + " return tf.maximum(z, 0., name=\"relu\")\n", "\n", "n_features = 3\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X) for i in range(5)]\n", - "output = tf.add_n(relus, name=\"output\")\n", - "summary_writer = tf.summary.FileWriter(\"logs/relu1\", tf.get_default_graph())" + "output = tf.add_n(relus, name=\"output\")" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "file_writer = tf.summary.FileWriter(\"logs/relu1\", tf.get_default_graph())" ] }, { @@ -1224,7 +1639,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 76, "metadata": { "collapsed": false, "deletable": true, @@ -1236,31 +1651,35 @@ "\n", "def relu(X):\n", " with tf.name_scope(\"relu\"):\n", - " w_shape = int(X.get_shape()[1]), 1\n", - " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", - " b = tf.Variable(0.0, name=\"bias\")\n", - " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", - " return tf.maximum(linear, 0, name=\"max\")\n", - "\n", + " w_shape = (int(X.get_shape()[1]), 1) # not shown in the book\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\") # not shown\n", + " b = tf.Variable(0.0, name=\"bias\") # not shown\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\") # not shown\n", + " return tf.maximum(z, 0., name=\"max\") # not shown" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "n_features = 3\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")\n", "\n", - "summary_writer = tf.summary.FileWriter(\"logs/relu2\", tf.get_default_graph())" + "file_writer = tf.summary.FileWriter(\"logs/relu2\", tf.get_default_graph())\n", + "file_writer.close()" ] }, { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], + "cell_type": "markdown", + "metadata": {}, "source": [ - "summary_writer.close()" + "## Sharing Variables" ] }, { @@ -1275,7 +1694,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 78, "metadata": { "collapsed": false, "deletable": true, @@ -1287,11 +1706,11 @@ "\n", "def relu(X, threshold):\n", " with tf.name_scope(\"relu\"):\n", - " w_shape = int(X.get_shape()[1]), 1\n", - " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", - " b = tf.Variable(0.0, name=\"bias\")\n", - " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", - " return tf.maximum(linear, threshold, name=\"max\")\n", + " w_shape = (int(X.get_shape()[1]), 1) # not shown in the book\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\") # not shown\n", + " b = tf.Variable(0.0, name=\"bias\") # not shown\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\") # not shown\n", + " return tf.maximum(z, threshold, name=\"max\")\n", "\n", "threshold = tf.Variable(0.0, name=\"threshold\")\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", @@ -1301,7 +1720,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 79, "metadata": { "collapsed": false, "deletable": true, @@ -1315,12 +1734,21 @@ " with tf.name_scope(\"relu\"):\n", " if not hasattr(relu, \"threshold\"):\n", " relu.threshold = tf.Variable(0.0, name=\"threshold\")\n", - " w_shape = int(X.get_shape()[1]), 1\n", - " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", - " b = tf.Variable(0.0, name=\"bias\")\n", - " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", - " return tf.maximum(linear, relu.threshold, name=\"max\")\n", - "\n", + " w_shape = int(X.get_shape()[1]), 1 # not shown in the book\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\") # not shown\n", + " b = tf.Variable(0.0, name=\"bias\") # not shown\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\") # not shown\n", + " return tf.maximum(z, relu.threshold, name=\"max\")" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")" @@ -1328,7 +1756,47 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 81, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "with tf.variable_scope(\"relu\"):\n", + " threshold = tf.get_variable(\"threshold\", shape=(),\n", + " initializer=tf.constant_initializer(0.0))" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "with tf.variable_scope(\"relu\", reuse=True):\n", + " threshold = tf.get_variable(\"threshold\")" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "with tf.variable_scope(\"relu\") as scope:\n", + " scope.reuse_variables()\n", + " threshold = tf.get_variable(\"threshold\")" + ] + }, + { + "cell_type": "code", + "execution_count": 84, "metadata": { "collapsed": false, "deletable": true, @@ -1340,26 +1808,36 @@ "\n", "def relu(X):\n", " with tf.variable_scope(\"relu\", reuse=True):\n", - " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", - " w_shape = int(X.get_shape()[1]), 1\n", - " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", - " b = tf.Variable(0.0, name=\"bias\")\n", - " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", - " return tf.maximum(linear, threshold, name=\"max\")\n", + " threshold = tf.get_variable(\"threshold\")\n", + " w_shape = int(X.get_shape()[1]), 1 # not shown\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\") # not shown\n", + " b = tf.Variable(0.0, name=\"bias\") # not shown\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\") # not shown\n", + " return tf.maximum(z, threshold, name=\"max\")\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "with tf.variable_scope(\"relu\"):\n", - " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", - "relus = [relu(X) for i in range(5)]\n", - "output = tf.add_n(relus, name=\"output\")\n", - "\n", - "summary_writer = tf.summary.FileWriter(\"logs/relu6\", tf.get_default_graph())\n", - "summary_writer.close()" + " threshold = tf.get_variable(\"threshold\", shape=(),\n", + " initializer=tf.constant_initializer(0.0))\n", + "relus = [relu(X) for relu_index in range(5)]\n", + "output = tf.add_n(relus, name=\"output\")" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 85, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "file_writer = tf.summary.FileWriter(\"logs/relu6\", tf.get_default_graph())\n", + "file_writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 86, "metadata": { "collapsed": false, "deletable": true, @@ -1372,11 +1850,11 @@ "def relu(X):\n", " with tf.variable_scope(\"relu\"):\n", " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", - " w_shape = int(X.get_shape()[1]), 1\n", + " w_shape = (int(X.get_shape()[1]), 1)\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", - " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", - " return tf.maximum(linear, threshold, name=\"max\")\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\")\n", + " return tf.maximum(z, threshold, name=\"max\")\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "with tf.variable_scope(\"\", default_name=\"\") as scope:\n", @@ -1385,35 +1863,47 @@ " relus = [first_relu] + [relu(X) for i in range(4)]\n", "output = tf.add_n(relus, name=\"output\")\n", "\n", - "summary_writer = tf.summary.FileWriter(\"logs/relu8\", tf.get_default_graph())\n", - "summary_writer.close()" + "file_writer = tf.summary.FileWriter(\"logs/relu8\", tf.get_default_graph())\n", + "file_writer.close()" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 87, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", - "with tf.variable_scope(\"param\"):\n", - " x = tf.get_variable(\"x\", shape=(), initializer=tf.constant_initializer(0.))\n", - " #x = tf.Variable(0., name=\"x\")\n", - "with tf.variable_scope(\"param\", reuse=True):\n", - " y = tf.get_variable(\"x\")\n", + "def relu(X):\n", + " threshold = tf.get_variable(\"threshold\", shape=(),\n", + " initializer=tf.constant_initializer(0.0))\n", + " w_shape = (int(X.get_shape()[1]), 1) # not shown in the book\n", + " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\") # not shown\n", + " b = tf.Variable(0.0, name=\"bias\") # not shown\n", + " z = tf.add(tf.matmul(X, w), b, name=\"z\") # not shown\n", + " return tf.maximum(z, threshold, name=\"max\")\n", "\n", - "with tf.variable_scope(\"\", default_name=\"\", reuse=True):\n", - " z = tf.get_variable(\"param/x\", shape=(), initializer=tf.constant_initializer(0.))\n", - "\n", - "print(x is y)\n", - "print(x.op.name)\n", - "print(y.op.name)\n", - "print(z.op.name)" + "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", + "relus = []\n", + "for relu_index in range(5):\n", + " with tf.variable_scope(\"relu\", reuse=(relu_index >= 1)) as scope:\n", + " relus.append(relu(X))\n", + "output = tf.add_n(relus, name=\"output\")" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "file_writer = tf.summary.FileWriter(\"logs/relu9\", tf.get_default_graph())\n", + "file_writer.close()" ] }, { @@ -1426,6 +1916,50 @@ "# Extra material" ] }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "\n", + "with tf.variable_scope(\"my_scope\"):\n", + " x0 = tf.get_variable(\"x\", shape=(), initializer=tf.constant_initializer(0.))\n", + " x1 = tf.Variable(0., name=\"x\")\n", + " x2 = tf.Variable(0., name=\"x\")\n", + "\n", + "with tf.variable_scope(\"my_scope\", reuse=True):\n", + " x3 = tf.get_variable(\"x\")\n", + " x4 = tf.Variable(0., name=\"x\")\n", + "\n", + "with tf.variable_scope(\"\", default_name=\"\", reuse=True):\n", + " x5 = tf.get_variable(\"my_scope/x\")\n", + "\n", + "print(\"x0:\", x0.op.name)\n", + "print(\"x1:\", x1.op.name)\n", + "print(\"x2:\", x2.op.name)\n", + "print(\"x3:\", x3.op.name)\n", + "print(\"x4:\", x4.op.name)\n", + "print(\"x5:\", x5.op.name)\n", + "print(x0 is x3 and x3 is x5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first `variable_scope()` block first creates the shared variable `x0`, named `my_scope/x`. For all operations other than shared variables (including non-shared variables), the variable scope acts like a regular name scope, which is why the two variables `x1` and `x2` have a name with a prefix `my_scope/`. Note however that TensorFlow makes their names unique by adding an index: `my_scope/x_1` and `my_scope/x_2`.\n", + "\n", + "The second `variable_scope()` block reuses the shared variables in scope `my_scope`, which is why `x0 is x3`. Once again, for all operations other than shared variables it acts as a named scope, and since it's a separate block from the first one, the name of the scope is made unique by TensorFlow (`my_scope_1`) and thus the variable `x4` is named `my_scope_1/x`.\n", + "\n", + "The third block shows another way to get a handle on the shared variable `my_scope/x` by creating a `variable_scope()` at the root scope (whose name is an empty string), then calling `get_variable()` with the full name of the shared variable (i.e. `\"my_scope/x\"`)." + ] + }, { "cell_type": "markdown", "metadata": { @@ -1438,7 +1972,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 90, "metadata": { "collapsed": false, "deletable": true, @@ -1462,54 +1996,12 @@ "editable": true }, "source": [ - "## Distributed TensorFlow" + "## Implementing a Home-Made Computation Graph" ] }, { "cell_type": "code", - "execution_count": 48, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "server = tf.train.Server.create_local_server()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "x = tf.constant(2) + tf.constant(3)\n", - "with tf.Session(server.target) as sess:\n", - " print(sess.run(x))" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, - "scrolled": true - }, - "outputs": [], - "source": [ - "server.target" - ] - }, - { - "cell_type": "code", - "execution_count": 51, + "execution_count": 91, "metadata": { "collapsed": false, "deletable": true, @@ -1520,16 +2012,17 @@ "class Const(object):\n", " def __init__(self, value):\n", " self.value = value\n", - " def evaluate(self, **variables):\n", + " def evaluate(self):\n", " return self.value\n", " def __str__(self):\n", " return str(self.value)\n", "\n", "class Var(object):\n", - " def __init__(self, name):\n", + " def __init__(self, init_value, name):\n", + " self.value = init_value\n", " self.name = name\n", - " def evaluate(self, **variables):\n", - " return variables[self.name]\n", + " def evaluate(self):\n", + " return self.value\n", " def __str__(self):\n", " return self.name\n", "\n", @@ -1539,22 +2032,22 @@ " self.b = b\n", "\n", "class Add(BinaryOperator):\n", - " def evaluate(self, **variables):\n", - " return self.a.evaluate(**variables) + self.b.evaluate(**variables)\n", + " def evaluate(self):\n", + " return self.a.evaluate() + self.b.evaluate()\n", " def __str__(self):\n", " return \"{} + {}\".format(self.a, self.b)\n", "\n", "class Mul(BinaryOperator):\n", - " def evaluate(self, **variables):\n", - " return self.a.evaluate(**variables) * self.b.evaluate(**variables)\n", + " def evaluate(self):\n", + " return self.a.evaluate() * self.b.evaluate()\n", " def __str__(self):\n", " return \"({}) * ({})\".format(self.a, self.b)\n", "\n", - "x = Var(\"x\")\n", - "y = Var(\"y\")\n", + "x = Var(3, name=\"x\")\n", + "y = Var(4, name=\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", "print(\"f(x,y) =\", f)\n", - "print(\"f(3,4) =\", f.evaluate(x=3, y=4))" + "print(\"f(3,4) =\", f.evaluate())" ] }, { @@ -1570,7 +2063,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 92, "metadata": { "collapsed": false, "deletable": true, @@ -1578,10 +2071,10 @@ }, "outputs": [], "source": [ - "df_dx = Mul(Const(2), Mul(Var(\"x\"), Var(\"y\"))) # df/dx = 2xy\n", - "df_dy = Add(Mul(Var(\"x\"), Var(\"x\")), Const(1)) # df/dy = x² + 1\n", - "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n", - "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))" + "df_dx = Mul(Const(2), Mul(x, y)) # df/dx = 2xy\n", + "df_dy = Add(Mul(x, x), Const(1)) # df/dy = x² + 1\n", + "print(\"df/dx(3,4) =\", df_dx.evaluate())\n", + "print(\"df/dy(3,4) =\", df_dy.evaluate())" ] }, { @@ -1596,7 +2089,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 93, "metadata": { "collapsed": false, "deletable": true, @@ -1604,47 +2097,21 @@ }, "outputs": [], "source": [ - "def derivative(f, x, y, x_eps, y_eps):\n", - " return (f.evaluate(x = x + x_eps, y = y + y_eps) - f.evaluate(x = x, y = y)) / (x_eps + y_eps)\n", + "def gradients(func, vars_list, eps=0.0001):\n", + " partial_derivatives = []\n", + " base_func_eval = func.evaluate()\n", + " for var in vars_list:\n", + " original_value = var.value\n", + " var.value = var.value + eps\n", + " tweaked_func_eval = func.evaluate()\n", + " var.value = original_value\n", + " derivative = (tweaked_func_eval - base_func_eval) / eps\n", + " partial_derivatives.append(derivative)\n", + " return partial_derivatives\n", "\n", - "df_dx_34 = derivative(f, x=3, y=4, x_eps=0.0001, y_eps=0)\n", - "df_dy_34 = derivative(f, x=3, y=4, x_eps=0, y_eps=0.0001)\n", - "print(\"df/dx(3,4) =\", df_dx_34)\n", - "print(\"df/dy(3,4) =\", df_dy_34)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "def f(x, y):\n", - " return x**2*y + y + 2\n", - "\n", - "def derivative(f, x, y, x_eps, y_eps):\n", - " return (f(x + x_eps, y + y_eps) - f(x, y)) / (x_eps + y_eps)\n", - "\n", - "df_dx = derivative(f, 3, 4, 0.00001, 0)\n", - "df_dy = derivative(f, 3, 4, 0, 0.00001)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "print(df_dx)\n", - "print(df_dy)" + "df_dx, df_dy = gradients(f, [x, y])\n", + "print(\"df/dx(3,4) =\", df_dx)\n", + "print(\"df/dy(3,4) =\", df_dy)" ] }, { @@ -1659,7 +2126,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 94, "metadata": { "collapsed": false, "deletable": true, @@ -1668,18 +2135,18 @@ "outputs": [], "source": [ "Const.derive = lambda self, var: Const(0)\n", - "Var.derive = lambda self, var: Const(1) if self.name==var else Const(0)\n", + "Var.derive = lambda self, var: Const(1) if self is var else Const(0)\n", "Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))\n", "Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))\n", "\n", - "x = Var(\"x\")\n", - "y = Var(\"y\")\n", + "x = Var(3.0, name=\"x\")\n", + "y = Var(4.0, name=\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", "\n", - "df_dx = f.derive(\"x\") # 2xy\n", - "df_dy = f.derive(\"y\") # x² + 1\n", - "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n", - "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))" + "df_dx = f.derive(x) # 2xy\n", + "df_dy = f.derive(y) # x² + 1\n", + "print(\"df/dx(3,4) =\", df_dx.evaluate())\n", + "print(\"df/dy(3,4) =\", df_dy.evaluate())" ] }, { @@ -1694,59 +2161,130 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 95, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ - "class Const(object):\n", - " def __init__(self, value):\n", + "class DualNumber(object):\n", + " def __init__(self, value=0.0, eps=0.0):\n", " self.value = value\n", - " def evaluate(self, derive, **variables):\n", - " return self.value, 0\n", + " self.eps = eps\n", + " def __add__(self, b):\n", + " return DualNumber(self.value + self.to_dual(b).value,\n", + " self.eps + self.to_dual(b).eps)\n", + " def __radd__(self, a):\n", + " return self.to_dual(a).__add__(self)\n", + " def __mul__(self, b):\n", + " return DualNumber(self.value * self.to_dual(b).value,\n", + " self.eps * self.to_dual(b).value + self.value * self.to_dual(b).eps)\n", + " def __rmul__(self, a):\n", + " return self.to_dual(a).__mul__(self)\n", " def __str__(self):\n", - " return str(self.value)\n", + " if self.eps:\n", + " return \"{:.1f} + {:.1f}ε\".format(self.value, self.eps)\n", + " else:\n", + " return \"{:.1f}\".format(self.value)\n", + " def __repr__(self):\n", + " return str(self)\n", + " @classmethod\n", + " def to_dual(cls, n):\n", + " if hasattr(n, \"value\"):\n", + " return n\n", + " else:\n", + " return cls(n)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$3 + (3 + 4 \\epsilon) = 6 + 4\\epsilon$" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "3 + DualNumber(3, 4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$(3 + 4ε)\\times(5 + 7ε) = 3 \\times 5 + 3 \\times 7ε + 4ε \\times 5 + 4ε \\times 7ε = 15 + 21ε + 20ε + 28ε^2 = 15 + 41ε + 28 \\times 0 = 15 + 41ε$" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "DualNumber(3, 4) * DualNumber(5, 7)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x.value = DualNumber(3.0)\n", + "y.value = DualNumber(4.0)\n", "\n", - "class Var(object):\n", - " def __init__(self, name):\n", - " self.name = name\n", - " def evaluate(self, derive, **variables):\n", - " return variables[self.name], (1 if derive == self.name else 0)\n", - " def __str__(self):\n", - " return self.name\n", + "f.evaluate()" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x.value = DualNumber(3.0, 1.0) # 3 + ε\n", + "y.value = DualNumber(4.0) # 4\n", "\n", - "class BinaryOperator(object):\n", - " def __init__(self, a, b):\n", - " self.a = a\n", - " self.b = b\n", + "df_dx = f.evaluate().eps\n", "\n", - "class Add(BinaryOperator):\n", - " def evaluate(self, derive, **variables):\n", - " a, da = self.a.evaluate(derive, **variables)\n", - " b, db = self.b.evaluate(derive, **variables)\n", - " return a + b, da + db\n", - " def __str__(self):\n", - " return \"{} + {}\".format(self.a, self.b)\n", + "x.value = DualNumber(3.0) # 3\n", + "y.value = DualNumber(4.0, 1.0) # 4 + ε\n", "\n", - "class Mul(BinaryOperator):\n", - " def evaluate(self, derive, **variables):\n", - " a, da = self.a.evaluate(derive, **variables)\n", - " b, db = self.b.evaluate(derive, **variables)\n", - " return a * b, a * db + da * b\n", - " def __str__(self):\n", - " return \"({}) * ({})\".format(self.a, self.b)\n", - "\n", - "x = Var(\"x\")\n", - "y = Var(\"y\")\n", - "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", - "f34, df_dx_34 = f.evaluate(x=3, y=4, derive=\"x\")\n", - "f34, df_dy_34 = f.evaluate(x=3, y=4, derive=\"y\")\n", - "print(\"f(3,4) =\", f34)\n", - "print(\"df/dx(3,4) =\", df_dx_34)\n", - "print(\"df/dy(3,4) =\", df_dy_34)" + "df_dy = f.evaluate().eps" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_dx" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_dy" ] }, { @@ -1761,34 +2299,31 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 102, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": false }, "outputs": [], "source": [ "class Const(object):\n", " def __init__(self, value):\n", - " self.derivative = 0\n", " self.value = value\n", - " def evaluate(self, **variables):\n", + " def evaluate(self):\n", " return self.value\n", - " def backpropagate(self, derivative):\n", + " def backpropagate(self, gradient):\n", " pass\n", " def __str__(self):\n", " return str(self.value)\n", "\n", "class Var(object):\n", - " def __init__(self, name):\n", + " def __init__(self, init_value, name):\n", + " self.value = init_value\n", " self.name = name\n", - " def evaluate(self, **variables):\n", - " self.derivative = 0\n", - " self.value = variables[self.name]\n", + " self.gradient = 0\n", + " def evaluate(self):\n", " return self.value\n", - " def backpropagate(self, derivative):\n", - " self.derivative += derivative\n", + " def backpropagate(self, gradient):\n", + " self.gradient += gradient\n", " def __str__(self):\n", " return self.name\n", "\n", @@ -1798,48 +2333,36 @@ " self.b = b\n", "\n", "class Add(BinaryOperator):\n", - " def evaluate(self, **variables):\n", - " self.derivative = 0\n", - " self.value = self.a.evaluate(**variables) + self.b.evaluate(**variables)\n", + " def evaluate(self):\n", + " self.value = self.a.evaluate() + self.b.evaluate()\n", " return self.value\n", - " def backpropagate(self, derivative):\n", - " self.derivative += derivative\n", - " self.a.backpropagate(derivative)\n", - " self.b.backpropagate(derivative)\n", + " def backpropagate(self, gradient):\n", + " self.a.backpropagate(gradient)\n", + " self.b.backpropagate(gradient)\n", " def __str__(self):\n", " return \"{} + {}\".format(self.a, self.b)\n", "\n", "class Mul(BinaryOperator):\n", - " def evaluate(self, **variables):\n", - " self.derivative = 0\n", - " self.value = self.a.evaluate(**variables) * self.b.evaluate(**variables)\n", + " def evaluate(self):\n", + " self.value = self.a.evaluate() * self.b.evaluate()\n", " return self.value\n", - " def backpropagate(self, derivative):\n", - " self.derivative += derivative\n", - " self.a.backpropagate(derivative * self.b.value)\n", - " self.b.backpropagate(derivative * self.a.value)\n", + " def backpropagate(self, gradient):\n", + " self.a.backpropagate(gradient * self.b.value)\n", + " self.b.backpropagate(gradient * self.a.value)\n", " def __str__(self):\n", - " return \"({}) * ({})\".format(self.a, self.b)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "x = Var(\"x\")\n", - "y = Var(\"y\")\n", + " return \"({}) * ({})\".format(self.a, self.b)\n", + "\n", + "x = Var(3, name=\"x\")\n", + "y = Var(4, name=\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", - "f34 = f.evaluate(x=3, y=4)\n", - "f.backpropagate(1)\n", - "print(\"f(3,4) =\", f34)\n", - "print(\"df/dx(3,4) =\", x.derivative)\n", - "print(\"df/dy(3,4) =\", y.derivative)" + "\n", + "result = f.evaluate()\n", + "f.backpropagate(1.0)\n", + "\n", + "print(\"f(x,y) =\", f)\n", + "print(\"f(3,4) =\", result)\n", + "print(\"df_dx =\", x.gradient)\n", + "print(\"df_dy =\", y.gradient)" ] }, { @@ -1854,7 +2377,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 103, "metadata": { "collapsed": false, "deletable": true, @@ -1865,7 +2388,7 @@ "tf.reset_default_graph()\n", "\n", "x = tf.Variable(3., name=\"x\")\n", - "y = tf.Variable(4., name=\"x\")\n", + "y = tf.Variable(4., name=\"y\")\n", "f = x*x*y + y + 2\n", "\n", "gradients = tf.gradients(f, [x, y])\n", @@ -1895,6 +2418,543 @@ "deletable": true, "editable": true }, + "source": [ + "## 1. to 11." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "source": [ + "See appendix A." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 12. Logistic Regression with Mini-Batch Gradient Descent using TensorFlow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's create the moons dataset using Scikit-Learn's `make_moons()` function:" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.datasets import make_moons\n", + "\n", + "m = 1000\n", + "X_moons, y_moons = make_moons(m, noise=0.1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a peek at the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.plot(X_moons[y_moons == 1, 0], X_moons[y_moons == 1, 1], 'go', label=\"Positive\")\n", + "plt.plot(X_moons[y_moons == 0, 0], X_moons[y_moons == 0, 1], 'r^', label=\"Negative\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We must not forget to add an extra bias feature ($x_0 = 1$) to every instance. For this, we just need to add a column full of 1s on the left of the input matrix $\\mathbf{X}$:" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_moons_with_bias = np.c_[np.ones((m, 1)), X_moons]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check:" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_moons_with_bias[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looks good. Now let's reshape `y_train` to make it a column vector (i.e. a 2D array with a single column):" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "y_moons_column_vector = y_moons.reshape(-1, 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's split the data into a training set and a test set:" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "test_ratio = 0.2\n", + "test_size = int(m * test_ratio)\n", + "X_train = X_moons_with_bias[:-test_size]\n", + "X_test = X_moons_with_bias[-test_size:]\n", + "y_train = y_moons_column_vector[:-test_size]\n", + "y_test = y_moons_column_vector[-test_size:]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ok, now let's create a small function to generate training batches. In this implementation we will just pick random instances from the training set for each batch. This means that a single batch may contain the same instance multiple times, and also a single epoch may not cover all the training instances (in fact it will generally cover only about two thirds of the instances). However, in practice this is not an issue and it simplifies the code:" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def random_batch(X_train, y_train, batch_size):\n", + " rnd_indices = np.random.randint(0, len(X_train), batch_size)\n", + " X_batch = X_train[rnd_indices]\n", + " y_batch = y_train[rnd_indices]\n", + " return X_batch, y_batch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at a small batch:" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X_batch, y_batch = random_batch(X_train, y_train, 5)\n", + "X_batch" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y_batch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! Now that the data is ready to be fed to the model, we need to build that model. Let's start with a simple implementation, then we will add all the bells and whistles." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's reset the default graph." + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The _moons_ dataset has two input features, since each instance is a point on a plane (i.e., 2-Dimensional):" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "n_inputs = 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's build the Logistic Regression model. As we saw in chapter 4, this model first computes a weighted sum of the inputs (just like the Linear Regression model), and then it applies the sigmoid function to the result, which gives us the estimated probability for the positive class:\n", + "\n", + "$\\hat{p} = h_\\mathbf{\\theta}(\\mathbf{x}) = \\sigma(\\mathbf{\\theta}^T \\cdot \\mathbf{x})$\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Recall that $\\mathbf{\\theta}$ is the parameter vector, containing the bias term $\\theta_0$ and the weights $\\theta_1, \\theta_2, \\dots, \\theta_n$. The input vector $\\mathbf{x}$ contains a constant term $x_0 = 1$, as well as all the input features $x_1, x_2, \\dots, x_n$.\n", + "\n", + "Since we want to be able to make predictions for multiple instances at a time, we will use an input matrix $\\mathbf{X}$ rather than a single input vector. The $i^{th}$ row will contain the transpose of the $i^{th}$ input vector $(\\mathbf{x}^{(i)})^T$. It is then possible to estimate the probability that each instance belongs to the positive class using the following equation:\n", + "\n", + "$ \\hat{\\mathbf{p}} = \\sigma(\\mathbf{X} \\cdot \\mathbf{\\theta})$\n", + "\n", + "That's all we need to build the model:" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X = tf.placeholder(tf.float32, shape=(None, n_inputs + 1), name=\"X\")\n", + "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", + "theta = tf.Variable(tf.random_uniform([n_inputs + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", + "logits = tf.matmul(X, theta, name=\"logits\")\n", + "y_proba = 1 / (1 + tf.exp(-logits))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In fact, TensorFlow has a nice function `tf.sigmoid()` that we can use to simplify the last line of the previous code:" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "y_proba = tf.sigmoid(logits)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we saw in chapter 4, the log loss is a good cost function to use for Logistic Regression:\n", + "\n", + "$J(\\mathbf{\\theta}) = -\\dfrac{1}{m} \\sum\\limits_{i=1}^{m}{\\left[ y^{(i)} log\\left(\\hat{p}^{(i)}\\right) + (1 - y^{(i)}) log\\left(1 - \\hat{p}^{(i)}\\right)\\right]}$\n", + "\n", + "One option is to implement it ourselves:" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "epsilon = 1e-7 # to avoid an overflow when computing the log\n", + "loss = -tf.reduce_mean(y * tf.log(y_proba + epsilon) + (1 - y) * tf.log(1 - y_proba + epsilon))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But we might as well use TensorFlow's `tf.losses.log_loss()` function:" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "loss = tf.losses.log_loss(y, y_proba) # uses epsilon = 1e-7 by default" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The rest is pretty standard: let's create the optimizer and tell it to minimize the cost function:" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "learning_rate = 0.01\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", + "training_op = optimizer.minimize(loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All we need now (in this minimal version) is the variable initializer:" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we are ready to train the model and use it for predictions!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There's really nothing special about this code, it's virtually the same as the one we used earlier for Linear Regression:" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "n_epochs = 500\n", + "batch_size = 50\n", + "n_batches = int(np.ceil(m / batch_size))\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(init)\n", + "\n", + " for epoch in range(n_epochs):\n", + " for batch_index in range(n_batches):\n", + " X_batch, y_batch = random_batch(X_train, y_train, batch_size)\n", + " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", + " loss_val = loss.eval({X: X_test, y: y_test})\n", + " if epoch % 100 == 0:\n", + " print(\"Epoch:\", epoch, \"\\tLoss:\", loss_val)\n", + "\n", + " y_proba_val = y_proba.eval(feed_dict={X: X_test, y: y_test})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: we don't use the epoch number when generating batches, so we could just have a single `for` loop rather than 2 nested `for` loops, but it's convenient to think of training time in terms of number of epochs (i.e., roughly the number of times the algorithm went through the training set)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For each instance in the test set, `y_proba_val` contains the estimated probability that it belongs to the positive class, according to the model. For example, here are the first 5 estimated probabilities:" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y_proba_val[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To classify each instance, we can go for maximum likelihood: classify as positive any instance whose estimated probability is greater or equal to 0.5:" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y_pred = (y_proba_val >= 0.5)\n", + "y_pred[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Depending on the use case, you may want to choose a different threshold than 0.5: make it higher if you want high precision (but lower recall), and make it lower if you want high recall (but lower precision). See chapter 3 for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's compute the model's precision and recall:" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import precision_score, recall_score\n", + "\n", + "precision_score(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "recall_score(y_test, y_pred)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's plot these predictions to see what they look like:" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "y_pred_idx = y_pred.reshape(-1) # a 1D array rather than a column vector\n", + "plt.plot(X_test[y_pred_idx, 1], X_test[y_pred_idx, 2], 'go', label=\"Positive\")\n", + "plt.plot(X_test[~y_pred_idx, 1], X_test[~y_pred_idx, 2], 'r^', label=\"Negative\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Well, that looks pretty bad, doesn't it? But let's not forget that the Logistic Regression model has a linear decision boundary, so this is actually close to the best we can do with this model (unless we add more features, such as ${x_1}^2$, ${x_2}^2$ and $x_1 x_2$)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's just add all the bells and whistles, as listed in the exercise:\n", + "* Define the graph within a `logistic_regression()` function that can be reused easily.\n", + "* Save checkpoints using a `Saver` at regular intervals during training, and save the final model at the end of training.\n", + "* Restore the last checkpoint upon startup if training was interrupted.\n", + "* Define the graph using nice scopes so the graph looks good in TensorBoard.\n", + "* Add summaries to visualize the learning curves in TensorBoard.\n", + "* Try tweaking some hyperparameters such as the learning rate or the mini-batch size and look at the shape of the learning curve." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "**Coming soon**" ] @@ -1903,9 +2963,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": []