handson-ml/10_introduction_to_artifici...

661 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Chapter 10 Introduction to Artificial Neural Networks**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_This notebook contains all the sample code and solutions to the exercices in chapter 10._"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# To support both python 2 and python 3\n",
"from __future__ import division, print_function, unicode_literals\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import numpy.random as rnd\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"rnd.seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['axes.labelsize'] = 14\n",
"plt.rcParams['xtick.labelsize'] = 12\n",
"plt.rcParams['ytick.labelsize'] = 12\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"ann\"\n",
"\n",
"def save_fig(fig_id, tight_layout=True):\n",
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format='png', dpi=300)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Perceptrons"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.datasets import load_iris\n",
"iris = load_iris()\n",
"X = iris.data[:, (2, 3)] # petal length, petal width\n",
"y = (iris.target == 0).astype(np.int)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.linear_model import Perceptron\n",
"\n",
"per_clf = Perceptron(random_state=42)\n",
"per_clf.fit(X, y)\n",
"\n",
"y_pred = per_clf.predict([[2, 0.5]])\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n",
"b = -per_clf.intercept_ / per_clf.coef_[0][1]\n",
"\n",
"axes = [0, 5, 0, 2]\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n",
" np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"y_predict = per_clf.predict(X_new)\n",
"zz = y_predict.reshape(x0.shape)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n",
"\n",
"plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n",
"from matplotlib.colors import ListedColormap\n",
"custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n",
"\n",
"plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.legend(loc=\"lower right\", fontsize=14)\n",
"plt.axis(axes)\n",
"\n",
"save_fig(\"perceptron_iris_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Activation functions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def logit(z):\n",
" return 1 / (1 + np.exp(-z))\n",
"\n",
"def relu(z):\n",
" return np.maximum(0, z)\n",
"\n",
"def derivative(f, z, eps=0.000001):\n",
" return (f(z + eps) - f(z - eps))/(2 * eps)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"z = np.linspace(-5, 5, 200)\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"\n",
"plt.subplot(121)\n",
"plt.plot(z, np.sign(z), \"r-\", linewidth=2, label=\"Step\")\n",
"plt.plot(z, logit(z), \"g--\", linewidth=2, label=\"Logit\")\n",
"plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n",
"plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
"plt.grid(True)\n",
"plt.legend(loc=\"center right\", fontsize=14)\n",
"plt.title(\"Activation functions\", fontsize=14)\n",
"plt.axis([-5, 5, -1.2, 1.2])\n",
"\n",
"plt.subplot(122)\n",
"plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=2, label=\"Step\")\n",
"plt.plot(0, 0, \"ro\", markersize=5)\n",
"plt.plot(0, 0, \"rx\", markersize=10)\n",
"plt.plot(z, derivative(logit, z), \"g--\", linewidth=2, label=\"Logit\")\n",
"plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n",
"plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
"plt.grid(True)\n",
"#plt.legend(loc=\"center right\", fontsize=14)\n",
"plt.title(\"Derivatives\", fontsize=14)\n",
"plt.axis([-5, 5, -0.2, 1.2])\n",
"\n",
"save_fig(\"activation_functions_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def heaviside(z):\n",
" return (z >= 0).astype(z.dtype)\n",
"\n",
"def sigmoid(z):\n",
" return 1/(1+np.exp(-z))\n",
"\n",
"def mlp_xor(x1, x2, activation=heaviside):\n",
" return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"x1s = np.linspace(-0.2, 1.2, 100)\n",
"x2s = np.linspace(-0.2, 1.2, 100)\n",
"x1, x2 = np.meshgrid(x1s, x2s)\n",
"\n",
"z1 = mlp_xor(x1, x2, activation=heaviside)\n",
"z2 = mlp_xor(x1, x2, activation=sigmoid)\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"\n",
"plt.subplot(121)\n",
"plt.contourf(x1, x2, z1)\n",
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
"plt.title(\"Activation function: heaviside\", fontsize=14)\n",
"plt.grid(True)\n",
"\n",
"plt.subplot(122)\n",
"plt.contourf(x1, x2, z2)\n",
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
"plt.title(\"Activation function: sigmoid\", fontsize=14)\n",
"plt.grid(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# FNN for MNIST"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## using tf.learn"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"mnist = input_data.read_data_sets(\"/tmp/data/\")\n",
"X_train = mnist.train.images\n",
"X_test = mnist.test.images\n",
"y_train = mnist.train.labels.astype(\"int\")\n",
"y_test = mnist.test.labels.astype(\"int\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n",
"dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300, 100], n_classes=10,\n",
" feature_columns=feature_columns)\n",
"dnn_clf.fit(x=X_train, y=y_train, batch_size=50, steps=40000)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n",
"\n",
"y_pred = dnn_clf.predict(X_test)\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
"accuracy"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.metrics import log_loss\n",
"\n",
"y_pred_proba = dnn_clf.predict_proba(X_test)\n",
"log_loss(y_test, y_pred_proba)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dnn_clf.evaluate(X_test, y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Using plain TensorFlow"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"def neuron_layer(X, n_neurons, name, activation=None):\n",
" with tf.name_scope(name):\n",
" n_inputs = int(X.get_shape()[1])\n",
" stddev = 1 / np.sqrt(n_inputs)\n",
" init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)\n",
" W = tf.Variable(init, name=\"weights\")\n",
" b = tf.Variable(tf.zeros([n_neurons]), name=\"biases\")\n",
" Z = tf.matmul(X, W) + b\n",
" if activation==\"relu\":\n",
" return tf.nn.relu(Z)\n",
" else:\n",
" return Z"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"n_inputs = 28*28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10\n",
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" hidden1 = neuron_layer(X, n_hidden1, \"hidden1\", activation=\"relu\")\n",
" hidden2 = neuron_layer(hidden1, n_hidden2, \"hidden2\", activation=\"relu\")\n",
" logits = neuron_layer(hidden2, n_outputs, \"output\")\n",
"\n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
" training_op = optimizer.minimize(loss)\n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.initialize_all_variables()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"batch_size = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with tf.Session() as sess:\n",
" saver.restore(sess, \"my_model_final.ckpt\")\n",
" X_new_scaled = mnist.test.images[:20]\n",
" Z = logits.eval(feed_dict={X: X_new_scaled})\n",
" print(np.argmax(Z, axis=1))\n",
" print(mnist.test.labels[:20])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
"\n",
"def strip_consts(graph_def, max_const_size=32):\n",
" \"\"\"Strip large constant values from graph_def.\"\"\"\n",
" strip_def = tf.GraphDef()\n",
" for n0 in graph_def.node:\n",
" n = strip_def.node.add() \n",
" n.MergeFrom(n0)\n",
" if n.op == 'Const':\n",
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
" tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
" return strip_def\n",
"\n",
"def show_graph(graph_def, max_const_size=32):\n",
" \"\"\"Visualize TensorFlow graph.\"\"\"\n",
" if hasattr(graph_def, 'as_graph_def'):\n",
" graph_def = graph_def.as_graph_def()\n",
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
" code = \"\"\"\n",
" <script>\n",
" function load() {{\n",
" document.getElementById(\"{id}\").pbtxt = {data};\n",
" }}\n",
" </script>\n",
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
" <div style=\"height:600px\">\n",
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
" </div>\n",
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
"\n",
" iframe = \"\"\"\n",
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
" \"\"\".format(code.replace('\"', '&quot;'))\n",
" display(HTML(iframe))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using `fully_connected` instead of `neuron_layer()`"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"\n",
"from tensorflow.contrib.layers import fully_connected\n",
"\n",
"n_inputs = 28*28 # MNIST\n",
"n_hidden1 = 300\n",
"n_hidden2 = 100\n",
"n_outputs = 10\n",
"learning_rate = 0.01\n",
"\n",
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
"\n",
"with tf.name_scope(\"dnn\"):\n",
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
"\n",
"with tf.name_scope(\"loss\"):\n",
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)\n",
" loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
"\n",
"with tf.name_scope(\"train\"):\n",
" optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
" training_op = optimizer.minimize(loss)\n",
"\n",
"with tf.name_scope(\"eval\"):\n",
" correct = tf.nn.in_top_k(logits, y, 1)\n",
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
" \n",
"init = tf.initialize_all_variables()\n",
"saver = tf.train.Saver()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_epochs = 20\n",
"n_batches = 50\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" for epoch in range(n_epochs):\n",
" for iteration in range(mnist.train.num_examples // batch_size):\n",
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n",
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
"\n",
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"show_graph(tf.get_default_graph())"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Coming soon**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
},
"nav_menu": {
"height": "264px",
"width": "369px"
},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}