1710 lines
46 KiB
Plaintext
1710 lines
46 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Chapter 9 – Up and running with TensorFlow**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"_This notebook contains all the sample code and solutions to the exercices in chapter 9._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# To support both python 2 and python 3\n",
|
||
"from __future__ import division, print_function, unicode_literals\n",
|
||
"\n",
|
||
"# Common imports\n",
|
||
"import numpy as np\n",
|
||
"import numpy.random as rnd\n",
|
||
"import os\n",
|
||
"\n",
|
||
"# to make this notebook's output stable across runs\n",
|
||
"rnd.seed(42)\n",
|
||
"\n",
|
||
"# To plot pretty figures\n",
|
||
"%matplotlib inline\n",
|
||
"import matplotlib\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"plt.rcParams['axes.labelsize'] = 14\n",
|
||
"plt.rcParams['xtick.labelsize'] = 12\n",
|
||
"plt.rcParams['ytick.labelsize'] = 12\n",
|
||
"\n",
|
||
"# Where to save the figures\n",
|
||
"PROJECT_ROOT_DIR = \".\"\n",
|
||
"CHAPTER_ID = \"tensorflow\"\n",
|
||
"\n",
|
||
"def save_fig(fig_id, tight_layout=True):\n",
|
||
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
|
||
" print(\"Saving figure\", fig_id)\n",
|
||
" if tight_layout:\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.savefig(path, format='png', dpi=300)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Creating and running a graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"x = tf.Variable(3, name=\"x\")\n",
|
||
"y = tf.Variable(4, name=\"y\")\n",
|
||
"f = x*x*y + y + 2\n",
|
||
"\n",
|
||
"f"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"sess = tf.Session()\n",
|
||
"sess.run(x.initializer)\n",
|
||
"sess.run(y.initializer)\n",
|
||
"print(sess.run(f))\n",
|
||
"sess.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" x.initializer.run()\n",
|
||
" y.initializer.run()\n",
|
||
" result = f.eval()\n",
|
||
"\n",
|
||
"result"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"with tf.Session():\n",
|
||
" init.run()\n",
|
||
" result = f.eval()\n",
|
||
"\n",
|
||
"result"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"sess = tf.InteractiveSession()\n",
|
||
"init.run()\n",
|
||
"result = f.eval()\n",
|
||
"sess.close()\n",
|
||
"\n",
|
||
"result"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Managing graphs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"x1 = tf.Variable(1)\n",
|
||
"x1.graph is tf.get_default_graph()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"graph = tf.Graph()\n",
|
||
"with graph.as_default():\n",
|
||
" x2 = tf.Variable(2)\n",
|
||
"\n",
|
||
"x2.graph is tf.get_default_graph()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"x2.graph is graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"w = tf.constant(3)\n",
|
||
"x = w + 2\n",
|
||
"y = x + 5\n",
|
||
"z = x * 3\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" print(y.eval()) # 10\n",
|
||
" print(z.eval()) # 15"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" y_val, z_val = sess.run([y, z])\n",
|
||
" print(y) # 10\n",
|
||
" print(z) # 15"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Linear Regression"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using the Normal Equation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import fetch_california_housing\n",
|
||
"\n",
|
||
"housing = fetch_california_housing()\n",
|
||
"m, n = housing.data.shape\n",
|
||
"housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"X = tf.constant(housing_data_plus_bias, dtype=tf.float64, name=\"X\")\n",
|
||
"y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float64, name=\"y\")\n",
|
||
"XT = tf.transpose(X)\n",
|
||
"theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" result = theta.eval()\n",
|
||
"\n",
|
||
"print(result)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Compare with pure NumPy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X = housing_data_plus_bias\n",
|
||
"y = housing.target.reshape(-1, 1)\n",
|
||
"theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)\n",
|
||
"\n",
|
||
"print(theta_numpy)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Compare with Scikit-Learn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.linear_model import LinearRegression\n",
|
||
"lin_reg = LinearRegression()\n",
|
||
"lin_reg.fit(housing.data, housing.target.reshape(-1, 1))\n",
|
||
"\n",
|
||
"print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using Batch Gradient Descent"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Gradient Descent requires scaling the feature vectors first. We could do this using TF, but let's just use Scikit-Learn for now."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"scaler = StandardScaler()\n",
|
||
"scaled_housing_data = scaler.fit_transform(housing.data)\n",
|
||
"scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(scaled_housing_data_plus_bias.mean(axis=0))\n",
|
||
"print(scaled_housing_data_plus_bias.mean(axis=1))\n",
|
||
"print(scaled_housing_data_plus_bias.mean())\n",
|
||
"print(scaled_housing_data_plus_bias.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Manually computing the gradients"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
|
||
"y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"gradients = 2/m * tf.matmul(tf.transpose(X), error)\n",
|
||
"training_op = tf.assign(theta, theta - learning_rate * gradients)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" if epoch % 100 == 0:\n",
|
||
" print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
|
||
" sess.run(training_op)\n",
|
||
" \n",
|
||
" best_theta = theta.eval()\n",
|
||
"\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Using autodiff\n",
|
||
"Same as above except for the `gradients = ...` line."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
|
||
"y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"gradients = tf.gradients(mse, [theta])[0]\n",
|
||
"training_op = tf.assign(theta, theta - learning_rate * gradients)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" if epoch % 100 == 0:\n",
|
||
" print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
|
||
" sess.run(training_op)\n",
|
||
" \n",
|
||
" best_theta = theta.eval()\n",
|
||
"\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Using a `GradientDescentOptimizer`"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
|
||
"y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(mse)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" if epoch % 100 == 0:\n",
|
||
" print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
|
||
" sess.run(training_op)\n",
|
||
" \n",
|
||
" best_theta = theta.eval()\n",
|
||
"\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Using a momentum optimizer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
|
||
"y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.25)\n",
|
||
"training_op = optimizer.minimize(mse)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" sess.run(training_op)\n",
|
||
" \n",
|
||
" best_theta = theta.eval()\n",
|
||
"\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Feeding data to the training algorithm"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Placeholder nodes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
">>> tf.reset_default_graph()\n",
|
||
"\n",
|
||
">>> A = tf.placeholder(tf.float32, shape=(None, 3))\n",
|
||
">>> B = A + 5\n",
|
||
">>> with tf.Session() as sess:\n",
|
||
"... B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})\n",
|
||
"... B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})\n",
|
||
"...\n",
|
||
">>> print(B_val_1)\n",
|
||
">>> print(B_val_2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Mini-batch Gradient Descent"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n",
|
||
"y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(mse)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def fetch_batch(epoch, batch_index, batch_size):\n",
|
||
" rnd.seed(epoch * n_batches + batch_index)\n",
|
||
" indices = rnd.randint(m, size=batch_size)\n",
|
||
" X_batch = scaled_housing_data_plus_bias[indices]\n",
|
||
" y_batch = housing.target.reshape(-1, 1)[indices]\n",
|
||
" return X_batch, y_batch\n",
|
||
"\n",
|
||
"n_epochs = 10\n",
|
||
"batch_size = 100\n",
|
||
"n_batches = int(np.ceil(m / batch_size))\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for batch_index in range(n_batches):\n",
|
||
" X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
"\n",
|
||
" best_theta = theta.eval()\n",
|
||
" \n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Saving and restoring a model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name=\"X\")\n",
|
||
"y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(mse)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" if epoch % 100 == 0:\n",
|
||
" print(\"Epoch\", epoch, \"MSE =\", mse.eval())\n",
|
||
" save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n",
|
||
" sess.run(training_op)\n",
|
||
" \n",
|
||
" best_theta = theta.eval()\n",
|
||
" save_path = saver.save(sess, \"my_model_final.ckpt\")\n",
|
||
"\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Visualizing the graph\n",
|
||
"## inside Jupyter"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from IPython.display import clear_output, Image, display, HTML\n",
|
||
"\n",
|
||
"def strip_consts(graph_def, max_const_size=32):\n",
|
||
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
|
||
" strip_def = tf.GraphDef()\n",
|
||
" for n0 in graph_def.node:\n",
|
||
" n = strip_def.node.add() \n",
|
||
" n.MergeFrom(n0)\n",
|
||
" if n.op == 'Const':\n",
|
||
" tensor = n.attr['value'].tensor\n",
|
||
" size = len(tensor.tensor_content)\n",
|
||
" if size > max_const_size:\n",
|
||
" tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
|
||
" return strip_def\n",
|
||
"\n",
|
||
"def show_graph(graph_def, max_const_size=32):\n",
|
||
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
|
||
" if hasattr(graph_def, 'as_graph_def'):\n",
|
||
" graph_def = graph_def.as_graph_def()\n",
|
||
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
|
||
" code = \"\"\"\n",
|
||
" <script>\n",
|
||
" function load() {{\n",
|
||
" document.getElementById(\"{id}\").pbtxt = {data};\n",
|
||
" }}\n",
|
||
" </script>\n",
|
||
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
|
||
" <div style=\"height:600px\">\n",
|
||
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
|
||
" </div>\n",
|
||
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
|
||
"\n",
|
||
" iframe = \"\"\"\n",
|
||
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
|
||
" \"\"\".format(code.replace('\"', '"'))\n",
|
||
" display(HTML(iframe))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"show_graph(tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Using TensorBoard"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"from datetime import datetime\n",
|
||
"\n",
|
||
"now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
|
||
"root_logdir = \"tf_logs\"\n",
|
||
"logdir = \"{}/run-{}/\".format(root_logdir, now)\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n",
|
||
"y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"error = y_pred - y\n",
|
||
"mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(mse)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"mse_summary = tf.scalar_summary('MSE', mse)\n",
|
||
"summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 10\n",
|
||
"batch_size = 100\n",
|
||
"n_batches = int(np.ceil(m / batch_size))\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for batch_index in range(n_batches):\n",
|
||
" X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n",
|
||
" if batch_index % 10 == 0:\n",
|
||
" summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" step = epoch * n_batches + batch_index\n",
|
||
" summary_writer.add_summary(summary_str, step)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
"\n",
|
||
" best_theta = theta.eval()\n",
|
||
"\n",
|
||
"summary_writer.flush()\n",
|
||
"summary_writer.close()\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Name scopes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
|
||
"root_logdir = \"tf_logs\"\n",
|
||
"logdir = \"{}/run-{}/\".format(root_logdir, now)\n",
|
||
"\n",
|
||
"n_epochs = 1000\n",
|
||
"learning_rate = 0.01\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n + 1), name=\"X\")\n",
|
||
"y = tf.placeholder(tf.float32, shape=(None, 1), name=\"y\")\n",
|
||
"theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name=\"theta\")\n",
|
||
"y_pred = tf.matmul(X, theta, name=\"predictions\")\n",
|
||
"with tf.name_scope('loss') as scope:\n",
|
||
" error = y_pred - y\n",
|
||
" mse = tf.reduce_mean(tf.square(error), name=\"mse\")\n",
|
||
"optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
|
||
"training_op = optimizer.minimize(mse)\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"mse_summary = tf.scalar_summary('MSE', mse)\n",
|
||
"summary_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_epochs = 10\n",
|
||
"batch_size = 100\n",
|
||
"n_batches = int(np.ceil(m / batch_size))\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" sess.run(init)\n",
|
||
"\n",
|
||
" for epoch in range(n_epochs):\n",
|
||
" for batch_index in range(n_batches):\n",
|
||
" X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)\n",
|
||
" if batch_index % 10 == 0:\n",
|
||
" summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||
" step = epoch * n_batches + batch_index\n",
|
||
" summary_writer.add_summary(summary_str, step)\n",
|
||
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
|
||
"\n",
|
||
" best_theta = theta.eval()\n",
|
||
"\n",
|
||
"summary_writer.flush()\n",
|
||
"summary_writer.close()\n",
|
||
"print(\"Best theta:\")\n",
|
||
"print(best_theta)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(error.op.name)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(mse.op.name)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"a1 = tf.Variable(0, name=\"a\") # name == \"a\"\n",
|
||
"a2 = tf.Variable(0, name=\"a\") # name == \"a_1\"\n",
|
||
"\n",
|
||
"with tf.name_scope(\"param\"): # name == \"param\"\n",
|
||
" a3 = tf.Variable(0, name=\"a\") # name == \"param/a\"\n",
|
||
"\n",
|
||
"with tf.name_scope(\"param\"): # name == \"param_1\"\n",
|
||
" a4 = tf.Variable(0, name=\"a\") # name == \"param_1/a\"\n",
|
||
"\n",
|
||
"for node in (a1, a2, a3, a4):\n",
|
||
" print(node.op.name)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Modularity"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"An ugly flat code:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"n_features = 3\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"\n",
|
||
"w1 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights1\")\n",
|
||
"w2 = tf.Variable(tf.random_normal((n_features, 1)), name=\"weights2\")\n",
|
||
"b1 = tf.Variable(0.0, name=\"bias1\")\n",
|
||
"b2 = tf.Variable(0.0, name=\"bias2\")\n",
|
||
"\n",
|
||
"linear1 = tf.add(tf.matmul(X, w1), b1, name=\"linear1\")\n",
|
||
"linear2 = tf.add(tf.matmul(X, w2), b2, name=\"linear2\")\n",
|
||
"\n",
|
||
"relu1 = tf.maximum(linear1, 0, name=\"relu1\")\n",
|
||
"relu2 = tf.maximum(linear1, 0, name=\"relu2\") # Oops, cut&paste error! Did you spot it?\n",
|
||
"\n",
|
||
"output = tf.add_n([relu1, relu2], name=\"output\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Much better, using a function to build the ReLUs:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"def relu(X):\n",
|
||
" w_shape = int(X.get_shape()[1]), 1\n",
|
||
" w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
|
||
" b = tf.Variable(0.0, name=\"bias\")\n",
|
||
" linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
|
||
" return tf.maximum(linear, 0, name=\"relu\")\n",
|
||
"\n",
|
||
"n_features = 3\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"relus = [relu(X) for i in range(5)]\n",
|
||
"output = tf.add_n(relus, name=\"output\")\n",
|
||
"summary_writer = tf.train.SummaryWriter(\"logs/relu1\", tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Even better using name scopes:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"def relu(X):\n",
|
||
" with tf.name_scope(\"relu\"):\n",
|
||
" w_shape = int(X.get_shape()[1]), 1\n",
|
||
" w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
|
||
" b = tf.Variable(0.0, name=\"bias\")\n",
|
||
" linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
|
||
" return tf.maximum(linear, 0, name=\"max\")\n",
|
||
"\n",
|
||
"n_features = 3\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"relus = [relu(X) for i in range(5)]\n",
|
||
"output = tf.add_n(relus, name=\"output\")\n",
|
||
"\n",
|
||
"summary_writer = tf.train.SummaryWriter(\"logs/relu2\", tf.get_default_graph())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"summary_writer.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Sharing a `threshold` variable the classic way, by defining it outside of the `relu()` function then passing it as a parameter:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"def relu(X, threshold):\n",
|
||
" with tf.name_scope(\"relu\"):\n",
|
||
" w_shape = int(X.get_shape()[1]), 1\n",
|
||
" w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
|
||
" b = tf.Variable(0.0, name=\"bias\")\n",
|
||
" linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
|
||
" return tf.maximum(linear, threshold, name=\"max\")\n",
|
||
"\n",
|
||
"threshold = tf.Variable(0.0, name=\"threshold\")\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"relus = [relu(X, threshold) for i in range(5)]\n",
|
||
"output = tf.add_n(relus, name=\"output\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"def relu(X):\n",
|
||
" with tf.name_scope(\"relu\"):\n",
|
||
" if not hasattr(relu, \"threshold\"):\n",
|
||
" relu.threshold = tf.Variable(0.0, name=\"threshold\")\n",
|
||
" w_shape = int(X.get_shape()[1]), 1\n",
|
||
" w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
|
||
" b = tf.Variable(0.0, name=\"bias\")\n",
|
||
" linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
|
||
" return tf.maximum(linear, relu.threshold, name=\"max\")\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"relus = [relu(X) for i in range(5)]\n",
|
||
"output = tf.add_n(relus, name=\"output\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"def relu(X):\n",
|
||
" with tf.variable_scope(\"relu\", reuse=True):\n",
|
||
" threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n",
|
||
" w_shape = int(X.get_shape()[1]), 1\n",
|
||
" w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
|
||
" b = tf.Variable(0.0, name=\"bias\")\n",
|
||
" linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
|
||
" return tf.maximum(linear, threshold, name=\"max\")\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"with tf.variable_scope(\"relu\"):\n",
|
||
" threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n",
|
||
"relus = [relu(X) for i in range(5)]\n",
|
||
"output = tf.add_n(relus, name=\"output\")\n",
|
||
"\n",
|
||
"summary_writer = tf.train.SummaryWriter(\"logs/relu6\", tf.get_default_graph())\n",
|
||
"summary_writer.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"def relu(X):\n",
|
||
" with tf.variable_scope(\"relu\"):\n",
|
||
" threshold = tf.get_variable(\"threshold\", shape=(), initializer=tf.constant_initializer(0.0))\n",
|
||
" w_shape = int(X.get_shape()[1]), 1\n",
|
||
" w = tf.Variable(tf.random_normal(w_shape), name=\"weights\")\n",
|
||
" b = tf.Variable(0.0, name=\"bias\")\n",
|
||
" linear = tf.add(tf.matmul(X, w), b, name=\"linear\")\n",
|
||
" return tf.maximum(linear, threshold, name=\"max\")\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, n_features), name=\"X\")\n",
|
||
"with tf.variable_scope(\"\") as scope:\n",
|
||
" first_relu = relu(X) # create the shared variable\n",
|
||
" scope.reuse_variables() # then reuse it\n",
|
||
" relus = [first_relu] + [relu(X) for i in range(4)]\n",
|
||
"output = tf.add_n(relus, name=\"output\")\n",
|
||
"\n",
|
||
"summary_writer = tf.train.SummaryWriter(\"logs/relu8\", tf.get_default_graph())\n",
|
||
"summary_writer.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"with tf.variable_scope(\"param\"):\n",
|
||
" x = tf.get_variable(\"x\", shape=(), initializer=tf.constant_initializer(0.))\n",
|
||
" #x = tf.Variable(0., name=\"x\")\n",
|
||
"with tf.variable_scope(\"param\", reuse=True):\n",
|
||
" y = tf.get_variable(\"x\")\n",
|
||
"\n",
|
||
"with tf.variable_scope(\"\", reuse=True):\n",
|
||
" z = tf.get_variable(\"param/x\", shape=(), initializer=tf.constant_initializer(0.))\n",
|
||
"\n",
|
||
"print(x is y)\n",
|
||
"print(x.op.name)\n",
|
||
"print(y.op.name)\n",
|
||
"print(z.op.name)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Extra material"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Strings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"text = np.array(\"Do you want some café?\".split())\n",
|
||
"text_tensor = tf.constant(text)\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" print(text_tensor.eval())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Distributed TensorFlow"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"server = tf.train.Server.create_local_server()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"x = tf.constant(2) + tf.constant(3)\n",
|
||
"with tf.Session(server.target) as sess:\n",
|
||
" print(sess.run(x))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"server.target"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"class Const(object):\n",
|
||
" def __init__(self, value):\n",
|
||
" self.value = value\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" return self.value\n",
|
||
" def __str__(self):\n",
|
||
" return str(self.value)\n",
|
||
"\n",
|
||
"class Var(object):\n",
|
||
" def __init__(self, name):\n",
|
||
" self.name = name\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" return variables[self.name]\n",
|
||
" def __str__(self):\n",
|
||
" return self.name\n",
|
||
"\n",
|
||
"class BinaryOperator(object):\n",
|
||
" def __init__(self, a, b):\n",
|
||
" self.a = a\n",
|
||
" self.b = b\n",
|
||
"\n",
|
||
"class Add(BinaryOperator):\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" return self.a.evaluate(**variables) + self.b.evaluate(**variables)\n",
|
||
" def __str__(self):\n",
|
||
" return \"{} + {}\".format(self.a, self.b)\n",
|
||
"\n",
|
||
"class Mul(BinaryOperator):\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" return self.a.evaluate(**variables) * self.b.evaluate(**variables)\n",
|
||
" def __str__(self):\n",
|
||
" return \"({}) * ({})\".format(self.a, self.b)\n",
|
||
"\n",
|
||
"x = Var(\"x\")\n",
|
||
"y = Var(\"y\")\n",
|
||
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
|
||
"print(\"f(x,y) =\", f)\n",
|
||
"print(\"f(3,4) =\", f.evaluate(x=3, y=4))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Computing gradients\n",
|
||
"### Mathematical differentiation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_dx = Mul(Const(2), Mul(Var(\"x\"), Var(\"y\"))) # df/dx = 2xy\n",
|
||
"df_dy = Add(Mul(Var(\"x\"), Var(\"x\")), Const(1)) # df/dy = x² + 1\n",
|
||
"print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n",
|
||
"print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Numerical differentiation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def derivative(f, x, y, x_eps, y_eps):\n",
|
||
" return (f.evaluate(x = x + x_eps, y = y + y_eps) - f.evaluate(x = x, y = y)) / (x_eps + y_eps)\n",
|
||
"\n",
|
||
"df_dx_34 = derivative(f, x=3, y=4, x_eps=0.0001, y_eps=0)\n",
|
||
"df_dy_34 = derivative(f, x=3, y=4, x_eps=0, y_eps=0.0001)\n",
|
||
"print(\"df/dx(3,4) =\", df_dx_34)\n",
|
||
"print(\"df/dy(3,4) =\", df_dy_34)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def f(x, y):\n",
|
||
" return x**2*y + y + 2\n",
|
||
"\n",
|
||
"def derivative(f, x, y, x_eps, y_eps):\n",
|
||
" return (f(x + x_eps, y + y_eps) - f(x, y)) / (x_eps + y_eps)\n",
|
||
"\n",
|
||
"df_dx = derivative(f, 3, 4, 0.00001, 0)\n",
|
||
"df_dy = derivative(f, 3, 4, 0, 0.00001)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(df_dx)\n",
|
||
"print(df_dy)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Symbolic differentiation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"Const.derive = lambda self, var: Const(0)\n",
|
||
"Var.derive = lambda self, var: Const(1) if self.name==var else Const(0)\n",
|
||
"Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))\n",
|
||
"Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))\n",
|
||
"\n",
|
||
"x = Var(\"x\")\n",
|
||
"y = Var(\"y\")\n",
|
||
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
|
||
"\n",
|
||
"df_dx = f.derive(\"x\") # 2xy\n",
|
||
"df_dy = f.derive(\"y\") # x² + 1\n",
|
||
"print(\"df/dx(3,4) =\", df_dx.evaluate(x=3, y=4))\n",
|
||
"print(\"df/dy(3,4) =\", df_dy.evaluate(x=3, y=4))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Automatic differentiation (autodiff) – forward mode"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"class Const(object):\n",
|
||
" def __init__(self, value):\n",
|
||
" self.value = value\n",
|
||
" def evaluate(self, derive, **variables):\n",
|
||
" return self.value, 0\n",
|
||
" def __str__(self):\n",
|
||
" return str(self.value)\n",
|
||
"\n",
|
||
"class Var(object):\n",
|
||
" def __init__(self, name):\n",
|
||
" self.name = name\n",
|
||
" def evaluate(self, derive, **variables):\n",
|
||
" return variables[self.name], (1 if derive == self.name else 0)\n",
|
||
" def __str__(self):\n",
|
||
" return self.name\n",
|
||
"\n",
|
||
"class BinaryOperator(object):\n",
|
||
" def __init__(self, a, b):\n",
|
||
" self.a = a\n",
|
||
" self.b = b\n",
|
||
"\n",
|
||
"class Add(BinaryOperator):\n",
|
||
" def evaluate(self, derive, **variables):\n",
|
||
" a, da = self.a.evaluate(derive, **variables)\n",
|
||
" b, db = self.b.evaluate(derive, **variables)\n",
|
||
" return a + b, da + db\n",
|
||
" def __str__(self):\n",
|
||
" return \"{} + {}\".format(self.a, self.b)\n",
|
||
"\n",
|
||
"class Mul(BinaryOperator):\n",
|
||
" def evaluate(self, derive, **variables):\n",
|
||
" a, da = self.a.evaluate(derive, **variables)\n",
|
||
" b, db = self.b.evaluate(derive, **variables)\n",
|
||
" return a * b, a * db + da * b\n",
|
||
" def __str__(self):\n",
|
||
" return \"({}) * ({})\".format(self.a, self.b)\n",
|
||
"\n",
|
||
"x = Var(\"x\")\n",
|
||
"y = Var(\"y\")\n",
|
||
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
|
||
"f34, df_dx_34 = f.evaluate(x=3, y=4, derive=\"x\")\n",
|
||
"f34, df_dy_34 = f.evaluate(x=3, y=4, derive=\"y\")\n",
|
||
"print(\"f(3,4) =\", f34)\n",
|
||
"print(\"df/dx(3,4) =\", df_dx_34)\n",
|
||
"print(\"df/dy(3,4) =\", df_dy_34)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Autodiff – Reverse mode"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"class Const(object):\n",
|
||
" def __init__(self, value):\n",
|
||
" self.derivative = 0\n",
|
||
" self.value = value\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" return self.value\n",
|
||
" def backpropagate(self, derivative):\n",
|
||
" pass\n",
|
||
" def __str__(self):\n",
|
||
" return str(self.value)\n",
|
||
"\n",
|
||
"class Var(object):\n",
|
||
" def __init__(self, name):\n",
|
||
" self.name = name\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" self.derivative = 0\n",
|
||
" self.value = variables[self.name]\n",
|
||
" return self.value\n",
|
||
" def backpropagate(self, derivative):\n",
|
||
" self.derivative += derivative\n",
|
||
" def __str__(self):\n",
|
||
" return self.name\n",
|
||
"\n",
|
||
"class BinaryOperator(object):\n",
|
||
" def __init__(self, a, b):\n",
|
||
" self.a = a\n",
|
||
" self.b = b\n",
|
||
"\n",
|
||
"class Add(BinaryOperator):\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" self.derivative = 0\n",
|
||
" self.value = self.a.evaluate(**variables) + self.b.evaluate(**variables)\n",
|
||
" return self.value\n",
|
||
" def backpropagate(self, derivative):\n",
|
||
" self.derivative += derivative\n",
|
||
" self.a.backpropagate(derivative)\n",
|
||
" self.b.backpropagate(derivative)\n",
|
||
" def __str__(self):\n",
|
||
" return \"{} + {}\".format(self.a, self.b)\n",
|
||
"\n",
|
||
"class Mul(BinaryOperator):\n",
|
||
" def evaluate(self, **variables):\n",
|
||
" self.derivative = 0\n",
|
||
" self.value = self.a.evaluate(**variables) * self.b.evaluate(**variables)\n",
|
||
" return self.value\n",
|
||
" def backpropagate(self, derivative):\n",
|
||
" self.derivative += derivative\n",
|
||
" self.a.backpropagate(derivative * self.b.value)\n",
|
||
" self.b.backpropagate(derivative * self.a.value)\n",
|
||
" def __str__(self):\n",
|
||
" return \"({}) * ({})\".format(self.a, self.b)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"x = Var(\"x\")\n",
|
||
"y = Var(\"y\")\n",
|
||
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
|
||
"f34 = f.evaluate(x=3, y=4)\n",
|
||
"f.backpropagate(1)\n",
|
||
"print(\"f(3,4) =\", f34)\n",
|
||
"print(\"df/dx(3,4) =\", x.derivative)\n",
|
||
"print(\"df/dy(3,4) =\", y.derivative)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Autodiff – reverse mode (using TensorFlow)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"x = tf.Variable(3., name=\"x\")\n",
|
||
"y = tf.Variable(4., name=\"x\")\n",
|
||
"f = x*x*y + y + 2\n",
|
||
"\n",
|
||
"gradients = tf.gradients(f, [x, y])\n",
|
||
"\n",
|
||
"init = tf.initialize_all_variables()\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" init.run()\n",
|
||
" f_val, gradients_val = sess.run([f, gradients])\n",
|
||
"\n",
|
||
"f_val, gradients_val"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Exercise solutions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Coming soon**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.5.1"
|
||
},
|
||
"nav_menu": {
|
||
"height": "603px",
|
||
"width": "616px"
|
||
},
|
||
"toc": {
|
||
"navigate_menu": true,
|
||
"number_sections": true,
|
||
"sideBar": true,
|
||
"threshold": 6,
|
||
"toc_cell": false,
|
||
"toc_section_display": "block",
|
||
"toc_window_display": true
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
}
|