{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "**Chapter 10 – Introduction to Artificial Neural Networks**" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "_This notebook contains all the sample code and solutions to the exercices in chapter 10._" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Setup" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "# To support both python 2 and python 3\n", "from __future__ import division, print_function, unicode_literals\n", "\n", "# Common imports\n", "import numpy as np\n", "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", "rnd.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "plt.rcParams['axes.labelsize'] = 14\n", "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"ann\"\n", "\n", "def save_fig(fig_id, tight_layout=True):\n", " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format='png', dpi=300)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Perceptrons" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "from sklearn.datasets import load_iris\n", "iris = load_iris()\n", "X = iris.data[:, (2, 3)] # petal length, petal width\n", "y = (iris.target == 0).astype(np.int)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from sklearn.linear_model import Perceptron\n", "\n", "per_clf = Perceptron(random_state=42)\n", "per_clf.fit(X, y)\n", "\n", "y_pred = per_clf.predict([[2, 0.5]])\n", "y_pred" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n", "b = -per_clf.intercept_ / per_clf.coef_[0][1]\n", "\n", "axes = [0, 5, 0, 2]\n", "\n", "x0, x1 = np.meshgrid(\n", " np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n", " np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n", " )\n", "X_new = np.c_[x0.ravel(), x1.ravel()]\n", "y_predict = per_clf.predict(X_new)\n", "zz = y_predict.reshape(x0.shape)\n", "\n", "plt.figure(figsize=(10, 4))\n", "plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n", "plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n", "\n", "plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n", "from matplotlib.colors import ListedColormap\n", "custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n", "\n", "plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n", "plt.xlabel(\"Petal length\", fontsize=14)\n", "plt.ylabel(\"Petal width\", fontsize=14)\n", "plt.legend(loc=\"lower right\", fontsize=14)\n", "plt.axis(axes)\n", "\n", "save_fig(\"perceptron_iris_plot\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Activation functions" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def logit(z):\n", " return 1 / (1 + np.exp(-z))\n", "\n", "def relu(z):\n", " return np.maximum(0, z)\n", "\n", "def derivative(f, z, eps=0.000001):\n", " return (f(z + eps) - f(z - eps))/(2 * eps)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "z = np.linspace(-5, 5, 200)\n", "\n", "plt.figure(figsize=(11,4))\n", "\n", "plt.subplot(121)\n", "plt.plot(z, np.sign(z), \"r-\", linewidth=2, label=\"Step\")\n", "plt.plot(z, logit(z), \"g--\", linewidth=2, label=\"Logit\")\n", "plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n", "plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n", "plt.grid(True)\n", "plt.legend(loc=\"center right\", fontsize=14)\n", "plt.title(\"Activation functions\", fontsize=14)\n", "plt.axis([-5, 5, -1.2, 1.2])\n", "\n", "plt.subplot(122)\n", "plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=2, label=\"Step\")\n", "plt.plot(0, 0, \"ro\", markersize=5)\n", "plt.plot(0, 0, \"rx\", markersize=10)\n", "plt.plot(z, derivative(logit, z), \"g--\", linewidth=2, label=\"Logit\")\n", "plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n", "plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n", "plt.grid(True)\n", "#plt.legend(loc=\"center right\", fontsize=14)\n", "plt.title(\"Derivatives\", fontsize=14)\n", "plt.axis([-5, 5, -0.2, 1.2])\n", "\n", "save_fig(\"activation_functions_plot\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "def heaviside(z):\n", " return (z >= 0).astype(z.dtype)\n", "\n", "def sigmoid(z):\n", " return 1/(1+np.exp(-z))\n", "\n", "def mlp_xor(x1, x2, activation=heaviside):\n", " return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "x1s = np.linspace(-0.2, 1.2, 100)\n", "x2s = np.linspace(-0.2, 1.2, 100)\n", "x1, x2 = np.meshgrid(x1s, x2s)\n", "\n", "z1 = mlp_xor(x1, x2, activation=heaviside)\n", "z2 = mlp_xor(x1, x2, activation=sigmoid)\n", "\n", "plt.figure(figsize=(10,4))\n", "\n", "plt.subplot(121)\n", "plt.contourf(x1, x2, z1)\n", "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n", "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n", "plt.title(\"Activation function: heaviside\", fontsize=14)\n", "plt.grid(True)\n", "\n", "plt.subplot(122)\n", "plt.contourf(x1, x2, z2)\n", "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n", "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n", "plt.title(\"Activation function: sigmoid\", fontsize=14)\n", "plt.grid(True)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# FNN for MNIST" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## using tf.learn" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from tensorflow.examples.tutorials.mnist import input_data\n", "mnist = input_data.read_data_sets(\"/tmp/data/\")\n", "X_train = mnist.train.images\n", "X_test = mnist.test.images\n", "y_train = mnist.train.labels.astype(\"int\")\n", "y_test = mnist.test.labels.astype(\"int\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import tensorflow as tf\n", "\n", "feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n", "dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=10,\n", " feature_columns=feature_columns)\n", "dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40000)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score\n", "\n", "y_pred = list(dnn_clf.predict(X_test))\n", "accuracy = accuracy_score(y_test, y_pred)\n", "accuracy" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from sklearn.metrics import log_loss\n", "\n", "y_pred_proba = list(dnn_clf.predict_proba(X_test))\n", "log_loss(y_test, y_pred_proba)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dnn_clf.evaluate(X_test, y_test)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true, "deletable": true, "editable": true }, "source": [ "## Using plain TensorFlow" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "import tensorflow as tf\n", "\n", "def neuron_layer(X, n_neurons, name, activation=None):\n", " with tf.name_scope(name):\n", " n_inputs = int(X.get_shape()[1])\n", " stddev = 1 / np.sqrt(n_inputs)\n", " init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n", " W = tf.Variable(init, name=\"weights\")\n", " b = tf.Variable(tf.zeros([n_neurons]), name=\"biases\")\n", " Z = tf.matmul(X, W) + b\n", " if activation==\"relu\":\n", " return tf.nn.relu(Z)\n", " else:\n", " return Z" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 28*28 # MNIST\n", "n_hidden1 = 300\n", "n_hidden2 = 100\n", "n_outputs = 10\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "\n", "with tf.name_scope(\"dnn\"):\n", " hidden1 = neuron_layer(X, n_hidden1, \"hidden1\", activation=\"relu\")\n", " hidden2 = neuron_layer(hidden1, n_hidden2, \"hidden2\", activation=\"relu\")\n", " logits = neuron_layer(hidden2, n_outputs, \"output\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", " training_op = optimizer.minimize(loss)\n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "batch_size = 50\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(mnist.train.num_examples // batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"./my_model_final.ckpt\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " saver.restore(sess, save_path) #\"my_model_final.ckpt\")\n", " X_new_scaled = mnist.test.images[:20]\n", " Z = logits.eval(feed_dict={X: X_new_scaled})\n", " print(np.argmax(Z, axis=1))\n", " print(mnist.test.labels[:20])" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [ "from IPython.display import clear_output, Image, display, HTML\n", "\n", "def strip_consts(graph_def, max_const_size=32):\n", " \"\"\"Strip large constant values from graph_def.\"\"\"\n", " strip_def = tf.GraphDef()\n", " for n0 in graph_def.node:\n", " n = strip_def.node.add() \n", " n.MergeFrom(n0)\n", " if n.op == 'Const':\n", " tensor = n.attr['value'].tensor\n", " size = len(tensor.tensor_content)\n", " if size > max_const_size:\n", " tensor.tensor_content = b\"\"%size\n", " return strip_def\n", "\n", "def show_graph(graph_def, max_const_size=32):\n", " \"\"\"Visualize TensorFlow graph.\"\"\"\n", " if hasattr(graph_def, 'as_graph_def'):\n", " graph_def = graph_def.as_graph_def()\n", " strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n", " code = \"\"\"\n", " \n", " \n", "
\n", " \n", "
\n", " \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n", "\n", " iframe = \"\"\"\n", " \n", " \"\"\".format(code.replace('\"', '"'))\n", " display(HTML(iframe))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Using `dense()` instead of `neuron_layer()`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n", "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", "* the default `activation` is now `None` rather than `tf.nn.relu`.\n", "* a few more differences are presented in chapter 11." ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "n_inputs = 28*28 # MNIST\n", "n_hidden1 = 300\n", "n_hidden2 = 100\n", "n_outputs = 10\n", "learning_rate = 0.01\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n", "\n", "with tf.name_scope(\"dnn\"):\n", " hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\", activation=tf.nn.relu)\n", " hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\", activation=tf.nn.relu)\n", " logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n", "\n", "with tf.name_scope(\"loss\"):\n", " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n", " loss = tf.reduce_mean(xentropy, name=\"loss\")\n", "\n", "with tf.name_scope(\"train\"):\n", " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", " training_op = optimizer.minimize(loss)\n", "\n", "with tf.name_scope(\"eval\"):\n", " correct = tf.nn.in_top_k(logits, y, 1)\n", " accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", " \n", "init = tf.global_variables_initializer()\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "n_epochs = 20\n", "n_batches = 50\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", " for iteration in range(mnist.train.num_examples // batch_size):\n", " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", "\n", " save_path = saver.save(sess, \"./my_model_final.ckpt\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true, "deletable": true, "editable": true }, "source": [ "# Exercise solutions" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "**Coming soon**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "deletable": true, "editable": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.3" }, "nav_menu": { "height": "264px", "width": "369px" }, "toc": { "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 6, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }