{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Chapter 9 – Up and running with TensorFlow**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "_This notebook contains all the sample code and solutions to the exercices in chapter 9._" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# To support both python 2 and python 3\n", "from __future__ import division, print_function, unicode_literals\n", "\n", "# Common imports\n", "import numpy as np\n", "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", "rnd.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "plt.rcParams['axes.labelsize'] = 14\n", "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"tensorflow\"\n", "\n", "def save_fig(fig_id, tight_layout=True):\n", " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format='png', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating and running a graph" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import tensorflow as tf" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "x = tf.Variable(3, name=\"x\")\n", "y = tf.Variable(4, name=\"y\")\n", "f = x*x*y + y + 2\n", "\n", "f" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sess = tf.Session()\n", "sess.run(x.initializer)\n", "sess.run(y.initializer)\n", "print(sess.run(f))\n", "sess.close()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " x.initializer.run()\n", " y.initializer.run()\n", " result = f.eval()\n", "\n", "result" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "init = tf.initialize_all_variables()\n", "\n", "with tf.Session():\n", " init.run()\n", " result = f.eval()\n", "\n", "result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "init = tf.initialize_all_variables()\n", "\n", "sess = tf.InteractiveSession()\n", "init.run()\n", "result = f.eval()\n", "sess.close()\n", "\n", "result" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Managing graphs" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "x1 = tf.Variable(1)\n", "x1.graph is tf.get_default_graph()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "graph = tf.Graph()\n", "with graph.as_default():\n", " x2 = tf.Variable(2)\n", "\n", "x2.graph is tf.get_default_graph()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "x2.graph is graph" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "w = tf.constant(3)\n", "x = w + 2\n", "y = x + 5\n", "z = x * 3\n", "\n", "with tf.Session() as sess:\n", " print(y.eval()) # 10\n", " print(z.eval()) # 15" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " y_val, z_val = sess.run([y, z])\n", " print(y) # 10\n", " print(z) # 15" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Linear Regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using the Normal Equation" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.datasets import fetch_california_housing\n", "\n", "housing = fetch_california_housing()\n", "m, n = housing.data.shape\n", "housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "X = tf.constant(housing_data_plus_bias, dtype=tf.float64, name=\"X\")\n", "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float64, name=\"y\")\n", "XT = tf.transpose(X)\n", "theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)\n", "\n", "with tf.Session() as sess:\n", " result = theta.eval()\n", "\n", "print(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compare with pure NumPy" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X = housing_data_plus_bias\n", "y = housing.target.reshape(-1, 1)\n", "theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)\n", "\n", "print(theta_numpy)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compare with Scikit-Learn" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "lin_reg = LinearRegression()\n", "lin_reg.fit(housing.data, housing.target.reshape(-1, 1))\n", "\n", "print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using Batch Gradient Descent" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Gradient Descent requires scaling the feature vectors first. We could do this using TF, but let's just use Scikit-Learn for now." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "scaler = StandardScaler()\n", "scaled_housing_data = scaler.fit_transform(housing.data)\n", "scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(scaled_housing_data_plus_bias.mean(axis=0))\n", "print(scaled_housing_data_plus_bias.mean(axis=1))\n", "print(scaled_housing_data_plus_bias.mean())\n", "print(scaled_housing_data_plus_bias.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Manually computing the gradients" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "gradients = 2/m * tf.matmul(tf.transpose(X), error)\n", "training_op = tf.assign(theta, theta - learning_rate * gradients)\n", "\n", "init = tf.initialize_all_variables()\n", "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " if epoch % 100 == 0:\n", " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", " sess.run(training_op)\n", " \n", " best_theta = theta.eval()\n", "\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using autodiff\n", "Same as above except for the `gradients = ...` line." ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "gradients = tf.gradients(mse, [theta])[0]\n", "training_op = tf.assign(theta, theta - learning_rate * gradients)\n", "\n", "init = tf.initialize_all_variables()\n", "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " if epoch % 100 == 0:\n", " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", " sess.run(training_op)\n", " \n", " best_theta = theta.eval()\n", "\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using a `GradientDescentOptimizer`" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.initialize_all_variables()\n", "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " if epoch % 100 == 0:\n", " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", " sess.run(training_op)\n", " \n", " best_theta = theta.eval()\n", "\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using a momentum optimizer" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.25)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.initialize_all_variables()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " sess.run(training_op)\n", " \n", " best_theta = theta.eval()\n", "\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Feeding data to the training algorithm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Placeholder nodes" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ ">>> tf.reset_default_graph()\n", "\n", ">>> A = tf.placeholder(tf.float32, shape=(None, 3))\n", ">>> B = A + 5\n", ">>> with tf.Session() as sess:\n", "... B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})\n", "... B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})\n", "...\n", ">>> print(B_val_1)\n", ">>> print(B_val_2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mini-batch Gradient Descent" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.initialize_all_variables()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def fetch_batch(epoch, batch_index, batch_size):\n", " rnd.seed(epoch * n_batches + batch_index)\n", " indices = rnd.randint(m, size=batch_size)\n", " X_batch = scaled_housing_data_plus_bias[indices]\n", " y_batch = housing.target.reshape(-1, 1)[indices]\n", " return X_batch, y_batch\n", "\n", "n_epochs = 10\n", "batch_size = 100\n", "n_batches = int(np.ceil(m / batch_size))\n", "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " for batch_index in range(n_batches):\n", " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", "\n", " best_theta = theta.eval()\n", " \n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Saving and restoring a model" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n", "y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.initialize_all_variables()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " if epoch % 100 == 0:\n", " print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n", " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", " sess.run(training_op)\n", " \n", " best_theta = theta.eval()\n", " save_path = saver.save(sess, \"my_model_final.ckpt\")\n", "\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualizing the graph\n", "## inside Jupyter" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from IPython.display import clear_output, Image, display, HTML\n", "\n", "def strip_consts(graph_def, max_const_size=32):\n", " \"\"\"Strip large constant values from graph_def.\"\"\"\n", " strip_def = tf.GraphDef()\n", " for n0 in graph_def.node:\n", " n = strip_def.node.add() \n", " n.MergeFrom(n0)\n", " if n.op == 'Const':\n", " tensor = n.attr['value'].tensor\n", " size = len(tensor.tensor_content)\n", " if size > max_const_size:\n", " tensor.tensor_content = b\"\"%size\n", " return strip_def\n", "\n", "def show_graph(graph_def, max_const_size=32):\n", " \"\"\"Visualize TensorFlow graph.\"\"\"\n", " if hasattr(graph_def, 'as_graph_def'):\n", " graph_def = graph_def.as_graph_def()\n", " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", " code = \"\"\"\n", " \n", " \n", "
\n", " \n", "
\n", " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", "\n", " iframe = \"\"\"\n", " \n", " \"\"\".format(code.replace('\"', '"'))\n", " display(HTML(iframe))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using TensorBoard" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "from datetime import datetime\n", "\n", "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n", "root_logdir = \"tf_logs\"\n", "logdir = \"{}/run-{}/\".format(root_logdir, now)\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "error = y_pred - y\n", "mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.initialize_all_variables()\n", "\n", "mse_summary = tf.scalar_summary('MSE', mse)\n", "summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n_epochs = 10\n", "batch_size = 100\n", "n_batches = int(np.ceil(m / batch_size))\n", "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " for batch_index in range(n_batches):\n", " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", " if batch_index % 10 == 0:\n", " summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n", " step = epoch * n_batches + batch_index\n", " summary_writer.add_summary(summary_str, step)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", "\n", " best_theta = theta.eval()\n", "\n", "summary_writer.flush()\n", "summary_writer.close()\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Name scopes" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n", "root_logdir = \"tf_logs\"\n", "logdir = \"{}/run-{}/\".format(root_logdir, now)\n", "\n", "n_epochs = 1000\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n", "y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n", "theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n", "y_pred = tf.matmul(X, theta, name=\"predictions\")\n", "with tf.name_scope('loss') as scope:\n", " error = y_pred - y\n", " mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n", "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", "training_op = optimizer.minimize(mse)\n", "\n", "init = tf.initialize_all_variables()\n", "\n", "mse_summary = tf.scalar_summary('MSE', mse)\n", "summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n_epochs = 10\n", "batch_size = 100\n", "n_batches = int(np.ceil(m / batch_size))\n", "\n", "with tf.Session() as sess:\n", " sess.run(init)\n", "\n", " for epoch in range(n_epochs):\n", " for batch_index in range(n_batches):\n", " X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n", " if batch_index % 10 == 0:\n", " summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n", " step = epoch * n_batches + batch_index\n", " summary_writer.add_summary(summary_str, step)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", "\n", " best_theta = theta.eval()\n", "\n", "summary_writer.flush()\n", "summary_writer.close()\n", "print(\"Best theta:\")\n", "print(best_theta)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(error.op.name)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(mse.op.name)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "a1 = tf.Variable(0, name=\"a\") # name == \"a\"\n", "a2 = tf.Variable(0, name=\"a\") # name == \"a_1\"\n", "\n", "with tf.name_scope(\"param\"): # name == \"param\"\n", " a3 = tf.Variable(0, name=\"a\") # name == \"param/a\"\n", "\n", "with tf.name_scope(\"param\"): # name == \"param_1\"\n", " a4 = tf.Variable(0, name=\"a\") # name == \"param_1/a\"\n", "\n", "for node in (a1, a2, a3, a4):\n", " print(node.op.name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Modularity" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "An ugly flat code:" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_features = 3\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "\n", "w1 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights1\")\n", "w2 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights2\")\n", "b1 = tf.Variable(0.0, name=\"bias1\")\n", "b2 = tf.Variable(0.0, name=\"bias2\")\n", "\n", "linear1 = tf.add(tf.matmul(X, w1), b1, name=\"linear1\")\n", "linear2 = tf.add(tf.matmul(X, w2), b2, name=\"linear2\")\n", "\n", "relu1 = tf.maximum(linear1, 0, name=\"relu1\")\n", "relu2 = tf.maximum(linear1, 0, name=\"relu2\") # Oops, cut&paste error! Did you spot it?\n", "\n", "output = tf.add_n([relu1, relu2], name=\"output\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Much better, using a function to build the ReLUs:" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "def relu(X):\n", " w_shape = int(X.get_shape()[1]), 1\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", " return tf.maximum(linear, 0, name=\"relu\")\n", "\n", "n_features = 3\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")\n", "summary_writer = tf.train.SummaryWriter(\"logs/relu1\", tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Even better using name scopes:" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "def relu(X):\n", " with tf.name_scope(\"relu\"):\n", " w_shape = int(X.get_shape()[1]), 1\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", " return tf.maximum(linear, 0, name=\"max\")\n", "\n", "n_features = 3\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")\n", "\n", "summary_writer = tf.train.SummaryWriter(\"logs/relu2\", tf.get_default_graph())" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "summary_writer.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Sharing a `threshold` variable the classic way, by defining it outside of the `relu()` function then passing it as a parameter:" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "def relu(X, threshold):\n", " with tf.name_scope(\"relu\"):\n", " w_shape = int(X.get_shape()[1]), 1\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", " return tf.maximum(linear, threshold, name=\"max\")\n", "\n", "threshold = tf.Variable(0.0, name=\"threshold\")\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X, threshold) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "def relu(X):\n", " with tf.name_scope(\"relu\"):\n", " if not hasattr(relu, \"threshold\"):\n", " relu.threshold = tf.Variable(0.0, name=\"threshold\")\n", " w_shape = int(X.get_shape()[1]), 1\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", " return tf.maximum(linear, relu.threshold, name=\"max\")\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "relus = [relu(X) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "def relu(X):\n", " with tf.variable_scope(\"relu\", reuse=True):\n", " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", " w_shape = int(X.get_shape()[1]), 1\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", " return tf.maximum(linear, threshold, name=\"max\")\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "with tf.variable_scope(\"relu\"):\n", " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", "relus = [relu(X) for i in range(5)]\n", "output = tf.add_n(relus, name=\"output\")\n", "\n", "summary_writer = tf.train.SummaryWriter(\"logs/relu6\", tf.get_default_graph())\n", "summary_writer.close()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "def relu(X):\n", " with tf.variable_scope(\"relu\"):\n", " threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n", " w_shape = int(X.get_shape()[1]), 1\n", " w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n", " b = tf.Variable(0.0, name=\"bias\")\n", " linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n", " return tf.maximum(linear, threshold, name=\"max\")\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n", "with tf.variable_scope(\"\") as scope:\n", " first_relu = relu(X) # create the shared variable\n", " scope.reuse_variables() # then reuse it\n", " relus = [first_relu] + [relu(X) for i in range(4)]\n", "output = tf.add_n(relus, name=\"output\")\n", "\n", "summary_writer = tf.train.SummaryWriter(\"logs/relu8\", tf.get_default_graph())\n", "summary_writer.close()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "with tf.variable_scope(\"param\"):\n", " x = tf.get_variable(\"x\", shape=(), initializer=tf.constant_initializer(0.))\n", " #x = tf.Variable(0., name=\"x\")\n", "with tf.variable_scope(\"param\", reuse=True):\n", " y = tf.get_variable(\"x\")\n", "\n", "with tf.variable_scope(\"\", reuse=True):\n", " z = tf.get_variable(\"param/x\", shape=(), initializer=tf.constant_initializer(0.))\n", "\n", "print(x is y)\n", "print(x.op.name)\n", "print(y.op.name)\n", "print(z.op.name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Extra material" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Strings" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "text = np.array(\"Do you want some café?\".split())\n", "text_tensor = tf.constant(text)\n", "\n", "with tf.Session() as sess:\n", " print(text_tensor.eval())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Distributed TensorFlow" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": true }, "outputs": [], "source": [ "server = tf.train.Server.create_local_server()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [], "source": [ "x = tf.constant(2) + tf.constant(3)\n", "with tf.Session(server.target) as sess:\n", " print(sess.run(x))" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "server.target" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class Const(object):\n", " def __init__(self, value):\n", " self.value = value\n", " def evaluate(self, **variables):\n", " return self.value\n", " def __str__(self):\n", " return str(self.value)\n", "\n", "class Var(object):\n", " def __init__(self, name):\n", " self.name = name\n", " def evaluate(self, **variables):\n", " return variables[self.name]\n", " def __str__(self):\n", " return self.name\n", "\n", "class BinaryOperator(object):\n", " def __init__(self, a, b):\n", " self.a = a\n", " self.b = b\n", "\n", "class Add(BinaryOperator):\n", " def evaluate(self, **variables):\n", " return self.a.evaluate(**variables) + self.b.evaluate(**variables)\n", " def __str__(self):\n", " return \"{} + {}\".format(self.a, self.b)\n", "\n", "class Mul(BinaryOperator):\n", " def evaluate(self, **variables):\n", " return self.a.evaluate(**variables) * self.b.evaluate(**variables)\n", " def __str__(self):\n", " return \"({}) * ({})\".format(self.a, self.b)\n", "\n", "x = Var(\"x\")\n", "y = Var(\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", "print(\"f(x,y) =\", f)\n", "print(\"f(3,4) =\", f.evaluate(x=3, y=4))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Computing gradients\n", "### Mathematical differentiation" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df_dx = Mul(Const(2), Mul(Var(\"x\"), Var(\"y\"))) # df/dx = 2xy\n", "df_dy = Add(Mul(Var(\"x\"), Var(\"x\")), Const(1)) # df/dy = x² + 1\n", "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n", "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Numerical differentiation" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def derivative(f, x, y, x_eps, y_eps):\n", " return (f.evaluate(x = x + x_eps, y = y + y_eps) - f.evaluate(x = x, y = y)) / (x_eps + y_eps)\n", "\n", "df_dx_34 = derivative(f, x=3, y=4, x_eps=0.0001, y_eps=0)\n", "df_dy_34 = derivative(f, x=3, y=4, x_eps=0, y_eps=0.0001)\n", "print(\"df/dx(3,4) =\", df_dx_34)\n", "print(\"df/dy(3,4) =\", df_dy_34)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def f(x, y):\n", " return x**2*y + y + 2\n", "\n", "def derivative(f, x, y, x_eps, y_eps):\n", " return (f(x + x_eps, y + y_eps) - f(x, y)) / (x_eps + y_eps)\n", "\n", "df_dx = derivative(f, 3, 4, 0.00001, 0)\n", "df_dy = derivative(f, 3, 4, 0, 0.00001)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(df_dx)\n", "print(df_dy)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Symbolic differentiation" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Const.derive = lambda self, var: Const(0)\n", "Var.derive = lambda self, var: Const(1) if self.name==var else Const(0)\n", "Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))\n", "Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))\n", "\n", "x = Var(\"x\")\n", "y = Var(\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", "\n", "df_dx = f.derive(\"x\") # 2xy\n", "df_dy = f.derive(\"y\") # x² + 1\n", "print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n", "print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Automatic differentiation (autodiff) – forward mode" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class Const(object):\n", " def __init__(self, value):\n", " self.value = value\n", " def evaluate(self, derive, **variables):\n", " return self.value, 0\n", " def __str__(self):\n", " return str(self.value)\n", "\n", "class Var(object):\n", " def __init__(self, name):\n", " self.name = name\n", " def evaluate(self, derive, **variables):\n", " return variables[self.name], (1 if derive == self.name else 0)\n", " def __str__(self):\n", " return self.name\n", "\n", "class BinaryOperator(object):\n", " def __init__(self, a, b):\n", " self.a = a\n", " self.b = b\n", "\n", "class Add(BinaryOperator):\n", " def evaluate(self, derive, **variables):\n", " a, da = self.a.evaluate(derive, **variables)\n", " b, db = self.b.evaluate(derive, **variables)\n", " return a + b, da + db\n", " def __str__(self):\n", " return \"{} + {}\".format(self.a, self.b)\n", "\n", "class Mul(BinaryOperator):\n", " def evaluate(self, derive, **variables):\n", " a, da = self.a.evaluate(derive, **variables)\n", " b, db = self.b.evaluate(derive, **variables)\n", " return a * b, a * db + da * b\n", " def __str__(self):\n", " return \"({}) * ({})\".format(self.a, self.b)\n", "\n", "x = Var(\"x\")\n", "y = Var(\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", "f34, df_dx_34 = f.evaluate(x=3, y=4, derive=\"x\")\n", "f34, df_dy_34 = f.evaluate(x=3, y=4, derive=\"y\")\n", "print(\"f(3,4) =\", f34)\n", "print(\"df/dx(3,4) =\", df_dx_34)\n", "print(\"df/dy(3,4) =\", df_dy_34)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Autodiff – Reverse mode" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class Const(object):\n", " def __init__(self, value):\n", " self.derivative = 0\n", " self.value = value\n", " def evaluate(self, **variables):\n", " return self.value\n", " def backpropagate(self, derivative):\n", " pass\n", " def __str__(self):\n", " return str(self.value)\n", "\n", "class Var(object):\n", " def __init__(self, name):\n", " self.name = name\n", " def evaluate(self, **variables):\n", " self.derivative = 0\n", " self.value = variables[self.name]\n", " return self.value\n", " def backpropagate(self, derivative):\n", " self.derivative += derivative\n", " def __str__(self):\n", " return self.name\n", "\n", "class BinaryOperator(object):\n", " def __init__(self, a, b):\n", " self.a = a\n", " self.b = b\n", "\n", "class Add(BinaryOperator):\n", " def evaluate(self, **variables):\n", " self.derivative = 0\n", " self.value = self.a.evaluate(**variables) + self.b.evaluate(**variables)\n", " return self.value\n", " def backpropagate(self, derivative):\n", " self.derivative += derivative\n", " self.a.backpropagate(derivative)\n", " self.b.backpropagate(derivative)\n", " def __str__(self):\n", " return \"{} + {}\".format(self.a, self.b)\n", "\n", "class Mul(BinaryOperator):\n", " def evaluate(self, **variables):\n", " self.derivative = 0\n", " self.value = self.a.evaluate(**variables) * self.b.evaluate(**variables)\n", " return self.value\n", " def backpropagate(self, derivative):\n", " self.derivative += derivative\n", " self.a.backpropagate(derivative * self.b.value)\n", " self.b.backpropagate(derivative * self.a.value)\n", " def __str__(self):\n", " return \"({}) * ({})\".format(self.a, self.b)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [], "source": [ "x = Var(\"x\")\n", "y = Var(\"y\")\n", "f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n", "f34 = f.evaluate(x=3, y=4)\n", "f.backpropagate(1)\n", "print(\"f(3,4) =\", f34)\n", "print(\"df/dx(3,4) =\", x.derivative)\n", "print(\"df/dy(3,4) =\", y.derivative)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Autodiff – reverse mode (using TensorFlow)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "x = tf.Variable(3., name=\"x\")\n", "y = tf.Variable(4., name=\"x\")\n", "f = x*x*y + y + 2\n", "\n", "gradients = tf.gradients(f, [x, y])\n", "\n", "init = tf.initialize_all_variables()\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " f_val, gradients_val = sess.run([f, gradients])\n", "\n", "f_val, gradients_val" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exercise solutions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Coming soon**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" }, "nav_menu": { "height": "603px", "width": "616px" }, "toc": { "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 6, "toc_cell": false, "toc_section_display": "block", "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 0 }