handson-ml/10_introduction_to_artifici...

1278 lines
33 KiB
Plaintext
Raw Normal View History

2016-09-27 23:31:21 +02:00
{
"cells": [
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"**Chapter 10 Introduction to Artificial Neural Networks**"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
2017-08-19 17:01:55 +02:00
"_This notebook contains all the sample code and solutions to the exercises in chapter 10._"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"def reset_graph(seed=42):\n",
" tf.reset_default_graph()\n",
" tf.set_random_seed(seed)\n",
" np.random.seed(seed)\n",
2016-09-27 23:31:21 +02:00
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"ann\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Perceptrons"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"import numpy as np\n",
2016-09-27 23:31:21 +02:00
"from sklearn.datasets import load_iris\n",
"from sklearn.linear_model import Perceptron\n",
"\n",
2016-09-27 23:31:21 +02:00
"iris = load_iris()\n",
"X = iris.data[:, (2, 3)] # petal length, petal width\n",
"y = (iris.target == 0).astype(np.int)\n",
"\n",
"per_clf = Perceptron(random_state=42)\n",
"per_clf.fit(X, y)\n",
"\n",
"y_pred = per_clf.predict([[2, 0.5]])"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n",
"b = -per_clf.intercept_ / per_clf.coef_[0][1]\n",
"\n",
"axes = [0, 5, 0, 2]\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n",
" np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"y_predict = per_clf.predict(X_new)\n",
"zz = y_predict.reshape(x0.shape)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n",
"\n",
"plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n",
"from matplotlib.colors import ListedColormap\n",
"custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n",
"\n",
"plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.legend(loc=\"lower right\", fontsize=14)\n",
"plt.axis(axes)\n",
"\n",
"save_fig(\"perceptron_iris_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# Activation functions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"def logit(z):\n",
" return 1 / (1 + np.exp(-z))\n",
"\n",
"def relu(z):\n",
" return np.maximum(0, z)\n",
"\n",
"def derivative(f, z, eps=0.000001):\n",
" return (f(z + eps) - f(z - eps))/(2 * eps)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"z = np.linspace(-5, 5, 200)\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"\n",
"plt.subplot(121)\n",
"plt.plot(z, np.sign(z), \"r-\", linewidth=2, label=\"Step\")\n",
"plt.plot(z, logit(z), \"g--\", linewidth=2, label=\"Logit\")\n",
"plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n",
"plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
"plt.grid(True)\n",
"plt.legend(loc=\"center right\", fontsize=14)\n",
"plt.title(\"Activation functions\", fontsize=14)\n",
"plt.axis([-5, 5, -1.2, 1.2])\n",
"\n",
"plt.subplot(122)\n",
"plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=2, label=\"Step\")\n",
"plt.plot(0, 0, \"ro\", markersize=5)\n",
"plt.plot(0, 0, \"rx\", markersize=10)\n",
"plt.plot(z, derivative(logit, z), \"g--\", linewidth=2, label=\"Logit\")\n",
"plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n",
"plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
"plt.grid(True)\n",
"#plt.legend(loc=\"center right\", fontsize=14)\n",
"plt.title(\"Derivatives\", fontsize=14)\n",
"plt.axis([-5, 5, -0.2, 1.2])\n",
"\n",
"save_fig(\"activation_functions_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"def heaviside(z):\n",
" return (z >= 0).astype(z.dtype)\n",
"\n",
"def sigmoid(z):\n",
" return 1/(1+np.exp(-z))\n",
"\n",
"def mlp_xor(x1, x2, activation=heaviside):\n",
" return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"x1s = np.linspace(-0.2, 1.2, 100)\n",
"x2s = np.linspace(-0.2, 1.2, 100)\n",
"x1, x2 = np.meshgrid(x1s, x2s)\n",
"\n",
"z1 = mlp_xor(x1, x2, activation=heaviside)\n",
"z2 = mlp_xor(x1, x2, activation=sigmoid)\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"\n",
"plt.subplot(121)\n",
"plt.contourf(x1, x2, z1)\n",
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
"plt.title(\"Activation function: heaviside\", fontsize=14)\n",
"plt.grid(True)\n",
"\n",
"plt.subplot(122)\n",
"plt.contourf(x1, x2, z2)\n",
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
"plt.title(\"Activation function: sigmoid\", fontsize=14)\n",
"plt.grid(True)"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"# FNN for MNIST"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## using tf.learn"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"\n",
"mnist = input_data.read_data_sets(\"/tmp/data/\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"X_train = mnist.train.images\n",
"X_test = mnist.test.images\n",
"y_train = mnist.train.labels.astype(\"int\")\n",
"y_test = mnist.test.labels.astype(\"int\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"config = tf.contrib.learn.RunConfig(tf_random_seed=42) # not shown in the config\n",
"\n",
"feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
"dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=10,\n",
" feature_columns=feature_cols, config=config)\n",
"dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) # if TensorFlow >= 1.1\n",
"dnn_clf.fit(X_train, y_train, batch_size=50, steps=40000)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 12,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n",
"\n",
"y_pred = dnn_clf.predict(X_test)\n",
"accuracy_score(y_test, y_pred['classes'])"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 13,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"from sklearn.metrics import log_loss\n",
"\n",
"y_pred_proba = y_pred['probabilities']\n",
2016-09-27 23:31:21 +02:00
"log_loss(y_test, y_pred_proba)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"source": [
"## Using plain TensorFlow"
]
},
2016-09-27 23:31:21 +02:00
{
"cell_type": "code",
"execution_count": 14,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"n_inputs = 28*28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 15,
2016-09-27 23:31:21 +02:00
"metadata": {
"collapsed": false,
2017-02-17 11:51:26 +01:00
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": [
"reset_graph()\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 16,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"def neuron_layer(X, n_neurons, name, activation=None):\n",
" with tf.name_scope(name):\n",
" n_inputs = int(X.get_shape()[1])\n",
" stddev = 2 / np.sqrt(n_inputs)\n",
2016-09-27 23:31:21 +02:00
" init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n",
" W = tf.Variable(init, name=\"kernel\")\n",
" b = tf.Variable(tf.zeros([n_neurons]), name=\"bias\")\n",
2016-09-27 23:31:21 +02:00
" Z = tf.matmul(X, W) + b\n",
" if activation is not None:\n",
" return activation(Z)\n",
2016-09-27 23:31:21 +02:00
" else:\n",
" return Z"
]
},
{
"cell_type": "code",
"execution_count": 17,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"with tf.name_scope(\"dnn\"):\n",
" hidden1 = neuron_layer(X, n_hidden1, name=\"hidden1\",\n",
" activation=tf.nn.relu)\n",
" hidden2 = neuron_layer(hidden1, n_hidden2, name=\"hidden2\",\n",
" activation=tf.nn.relu)\n",
" logits = neuron_layer(hidden2, n_outputs, name=\"outputs\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,\n",
" logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"learning_rate = 0.01\n",
2016-09-27 23:31:21 +02:00
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
" training_op = optimizer.minimize(loss)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()\n",
2016-09-27 23:31:21 +02:00
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 40\n",
"batch_size = 50"
]
},
{
"cell_type": "code",
"execution_count": 23,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
2016-10-06 14:09:17 +02:00
" for iteration in range(mnist.train.num_examples // batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images,\n",
" y: mnist.test.labels})\n",
2016-09-27 23:31:21 +02:00
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
2017-02-17 11:51:26 +01:00
" save_path = saver.save(sess, \"./my_model_final.ckpt\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 24,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, \"./my_model_final.ckpt\") # or better, use save_path\n",
2016-09-27 23:31:21 +02:00
" X_new_scaled = mnist.test.images[:20]\n",
" Z = logits.eval(feed_dict={X: X_new_scaled})\n",
" y_pred = np.argmax(Z, axis=1)"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(\"Predicted classes:\", y_pred)\n",
"print(\"Actual classes: \", mnist.test.labels[:20])"
]
},
{
"cell_type": "code",
"execution_count": 26,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
"\n",
"def strip_consts(graph_def, max_const_size=32):\n",
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
" strip_def = tf.GraphDef()\n",
" for n0 in graph_def.node:\n",
" n = strip_def.node.add() \n",
" n.MergeFrom(n0)\n",
" if n.op == 'Const':\n",
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
" tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
" return strip_def\n",
"\n",
"def show_graph(graph_def, max_const_size=32):\n",
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
" if hasattr(graph_def, 'as_graph_def'):\n",
" graph_def = graph_def.as_graph_def()\n",
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
" code = \"\"\"\n",
" <script>\n",
" function load() {{\n",
" document.getElementById(\"{id}\").pbtxt = {data};\n",
" }}\n",
" </script>\n",
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
" <div style=\"height:600px\">\n",
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
" </div>\n",
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
"\n",
" iframe = \"\"\"\n",
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
" \"\"\".format(code.replace('\"', '&quot;'))\n",
" display(HTML(iframe))"
]
},
{
"cell_type": "code",
"execution_count": 27,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## Using `dense()` instead of `neuron_layer()`"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n",
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
"* the default `activation` is now `None` rather than `tf.nn.relu`.\n",
"* a few more differences are presented in chapter 11."
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 28,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_inputs = 28*28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
2016-09-27 23:31:21 +02:00
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\") "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.name_scope(\"dnn\"):\n",
" hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\",\n",
" activation=tf.nn.relu)\n",
" hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\",\n",
" activation=tf.nn.relu)\n",
" logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.name_scope(\"loss\"):\n",
2017-02-17 11:51:26 +01:00
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"learning_rate = 0.01\n",
2016-09-27 23:31:21 +02:00
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
" training_op = optimizer.minimize(loss)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2016-09-27 23:31:21 +02:00
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
2017-02-17 11:51:26 +01:00
"init = tf.global_variables_initializer()\n",
2016-09-27 23:31:21 +02:00
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 35,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"n_batches = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
2016-10-06 14:09:17 +02:00
" for iteration in range(mnist.train.num_examples // batch_size):\n",
2016-09-27 23:31:21 +02:00
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
2017-02-17 11:51:26 +01:00
" save_path = saver.save(sess, \"./my_model_final.ckpt\")"
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 36,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": false,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
2017-02-17 11:51:26 +01:00
"metadata": {
"deletable": true,
"editable": true
},
2016-09-27 23:31:21 +02:00
"source": [
"## 1. to 8."
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"source": [
"See appendix A."
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## 9."
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"_Train a deep MLP on the MNIST dataset and see if you can get over 98% precision. Just like in the last exercise of chapter 9, try adding all the bells and whistles (i.e., save checkpoints, restore the last checkpoint in case of an interruption, add summaries, plot learning curves using TensorBoard, and so on)._"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"First let's create the deep net. It's exactly the same as earlier, with just one addition: we add a `tf.summary.scalar()` to track the loss and the accuracy during training, so we can view nice learning curves using TensorBoard."
2016-09-27 23:31:21 +02:00
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_inputs = 28*28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"reset_graph()\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\") "
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.name_scope(\"dnn\"):\n",
" hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\",\n",
" activation=tf.nn.relu)\n",
" hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\",\n",
" activation=tf.nn.relu)\n",
" logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
" loss_summary = tf.summary.scalar('log_loss', loss)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"learning_rate = 0.01\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
" training_op = optimizer.minimize(loss)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" accuracy_summary = tf.summary.scalar('accuracy', accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 43,
2016-09-27 23:31:21 +02:00
"metadata": {
2017-02-17 11:51:26 +01:00
"collapsed": true,
"deletable": true,
"editable": true
2016-09-27 23:31:21 +02:00
},
"outputs": [],
"source": [
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Now we need to define the directory to write the TensorBoard logs to:"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"def log_dir(prefix=\"\"):\n",
" now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
" root_logdir = \"tf_logs\"\n",
" if prefix:\n",
" prefix += \"-\"\n",
" name = prefix + \"run-\" + now\n",
" return \"{}/{}/\".format(root_logdir, name)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"logdir = log_dir(\"mnist_dnn\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Now we can create the `FileWriter` that we will use to write the TensorBoard logs:"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Hey! Why don't we implement early stopping? For this, we are going to need a validation set. Luckily, the dataset returned by TensorFlow's `input_data()` function (see above) is already split into a training set (60,000 instances, already shuffled for us), a validation set (5,000 instances) and a test set (5,000 instances). So we can easily define `X_valid` and `y_valid`:"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"X_valid = mnist.validation.images\n",
"y_valid = mnist.validation.labels"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"m, n = X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"n_epochs = 10001\n",
"batch_size = 50\n",
"n_batches = int(np.ceil(m / batch_size))\n",
"\n",
"checkpoint_path = \"/tmp/my_deep_mnist_model.ckpt\"\n",
"checkpoint_epoch_path = checkpoint_path + \".epoch\"\n",
"final_model_path = \"./my_deep_mnist_model\"\n",
"\n",
"best_loss = np.infty\n",
"epochs_without_progress = 0\n",
"max_epochs_without_progress = 50\n",
"\n",
"with tf.Session() as sess:\n",
" if os.path.isfile(checkpoint_epoch_path):\n",
" # if the checkpoint file exists, restore the model and load the epoch number\n",
" with open(checkpoint_epoch_path, \"rb\") as f:\n",
" start_epoch = int(f.read())\n",
" print(\"Training was interrupted. Continuing at epoch\", start_epoch)\n",
" saver.restore(sess, checkpoint_path)\n",
" else:\n",
" start_epoch = 0\n",
" sess.run(init)\n",
"\n",
" for epoch in range(start_epoch, n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})\n",
" file_writer.add_summary(accuracy_summary_str, epoch)\n",
" file_writer.add_summary(loss_summary_str, epoch)\n",
" if epoch % 5 == 0:\n",
" print(\"Epoch:\", epoch,\n",
" \"\\tValidation accuracy: {:.3f}%\".format(accuracy_val * 100),\n",
" \"\\tLoss: {:.5f}\".format(loss_val))\n",
" saver.save(sess, checkpoint_path)\n",
" with open(checkpoint_epoch_path, \"wb\") as f:\n",
" f.write(b\"%d\" % (epoch + 1))\n",
" if loss_val < best_loss:\n",
" saver.save(sess, final_model_path)\n",
" best_loss = loss_val\n",
" else:\n",
" epochs_without_progress += 5\n",
" if epochs_without_progress > max_epochs_without_progress:\n",
" print(\"Early stopping\")\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"os.remove(checkpoint_epoch_path)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, final_model_path)\n",
" accuracy_val = accuracy.eval(feed_dict={X: X_test, y: y_test})"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"accuracy_val"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
2016-09-27 23:31:21 +02:00
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
2016-09-27 23:31:21 +02:00
},
"nav_menu": {
"height": "264px",
"width": "369px"
},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}