{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Chapter 13 – Convolutional Neural Networks**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "_This notebook contains all the sample code and solutions to the exercices in chapter 13._" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# To support both python 2 and python 3\n", "from __future__ import division, print_function, unicode_literals\n", "\n", "# Common imports\n", "import numpy as np\n", "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", "rnd.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "plt.rcParams['axes.labelsize'] = 14\n", "plt.rcParams['xtick.labelsize'] = 12\n", "plt.rcParams['ytick.labelsize'] = 12\n", "\n", "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"cnn\"\n", "\n", "def save_fig(fig_id, tight_layout=True):\n", " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", " plt.savefig(path, format='png', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A couple utility functions to plot grayscale and RGB images:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def plot_image(image):\n", " plt.imshow(image, cmap=\"gray\", interpolation=\"nearest\")\n", " plt.axis(\"off\")\n", "\n", "def plot_color_image(image):\n", " plt.imshow(image.astype(np.uint8),interpolation=\"nearest\")\n", " plt.axis(\"off\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And of course we will need TensorFlow:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import tensorflow as tf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Convolutional layer" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.datasets import load_sample_image\n", "china = load_sample_image(\"china.jpg\")\n", "flower = load_sample_image(\"flower.jpg\")\n", "image = china[150:220, 130:250]\n", "height, width, channels = image.shape\n", "image_grayscale = image.mean(axis=2).astype(np.float32)\n", "images = image_grayscale.reshape(1, height, width, 1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fmap = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32)\n", "fmap[:, 3, 0, 0] = 1\n", "fmap[3, :, 0, 1] = 1\n", "fmap[:, :, 0, 0]\n", "plot_image(fmap[:, :, 0, 0])\n", "plt.show()\n", "plot_image(fmap[:, :, 0, 1])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, height, width, 1))\n", "feature_maps = tf.constant(fmap)\n", "convolution = tf.nn.conv2d(X, feature_maps, strides=[1,1,1,1], padding=\"SAME\", use_cudnn_on_gpu=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with tf.Session() as sess:\n", " output = convolution.eval(feed_dict={X: images})" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plot_image(images[0, :, :, 0])\n", "save_fig(\"china_original\", tight_layout=False)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plot_image(output[0, :, :, 0])\n", "save_fig(\"china_vertical\", tight_layout=False)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plot_image(output[0, :, :, 1])\n", "save_fig(\"china_horizontal\", tight_layout=False)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simple example" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dataset = np.array([china, flower], dtype=np.float32)\n", "batch_size, height, width, channels = dataset.shape\n", "\n", "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n", "filters[:, 3, :, 0] = 1 # vertical line\n", "filters[3, :, :, 1] = 1 # horizontal line\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", "convolution = tf.nn.conv2d(X, filters, strides=[1,2,2,1], padding=\"SAME\")\n", "\n", "with tf.Session() as sess:\n", " output = sess.run(convolution, feed_dict={X: dataset})\n", "\n", "for image_index in (0, 1):\n", " for feature_map_index in (0, 1):\n", " plot_image(output[image_index, :, :, feature_map_index])\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## VALID vs SAME padding" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "filter_primes = np.array([2., 3., 5., 7., 11., 13.], dtype=np.float32)\n", "x = tf.constant(np.arange(1, 13+1, dtype=np.float32).reshape([1, 1, 13, 1]))\n", "filters = tf.constant(filter_primes.reshape(1, 6, 1, 1))\n", "\n", "valid_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='VALID')\n", "same_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='SAME')\n", "\n", "with tf.Session() as sess:\n", " print(\"VALID:\\n\", valid_conv.eval())\n", " print(\"SAME:\\n\", same_conv.eval())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(\"VALID:\")\n", "print(np.array([1,2,3,4,5,6]).T.dot(filter_primes))\n", "print(np.array([6,7,8,9,10,11]).T.dot(filter_primes))\n", "print(\"SAME:\")\n", "print(np.array([0,1,2,3,4,5]).T.dot(filter_primes))\n", "print(np.array([5,6,7,8,9,10]).T.dot(filter_primes))\n", "print(np.array([10,11,12,13,0,0]).T.dot(filter_primes))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pooling layer" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "batch_size, height, width, channels = dataset.shape\n", "\n", "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n", "filters[:, 3, :, 0] = 1 # vertical line\n", "filters[3, :, :, 1] = 1 # horizontal line\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", "max_pool = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1,2,2,1], padding=\"VALID\")\n", "\n", "with tf.Session() as sess:\n", " output = sess.run(max_pool, feed_dict={X: dataset})\n", "\n", "plot_color_image(dataset[0])\n", "save_fig(\"china_original\")\n", "plt.show()\n", " \n", "plot_color_image(output[0])\n", "save_fig(\"china_max_pool\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# MNIST" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.datasets import fetch_mldata\n", "\n", "mnist = fetch_mldata('MNIST original')\n", "X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n", "y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "height, width = 28, 28\n", "images = X_test[5000].reshape(1, height, width, 1)\n", "plot_image(images[0, :, :, 0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Inception v3" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "import sys\n", "import tarfile\n", "import urllib.request\n", "\n", "TF_MODELS_URL = \"http://download.tensorflow.org/models\"\n", "INCEPTION_V3_URL = TF_MODELS_URL + \"/inception_v3_2016_08_28.tar.gz\"\n", "INCEPTION_PATH = os.path.join(\"datasets\", \"inception\")\n", "INCEPTION_V3_CHECKPOINT_PATH = os.path.join(INCEPTION_PATH, \"inception_v3.ckpt\")\n", "\n", "def download_progress(count, block_size, total_size):\n", " percent = count * block_size * 100 // total_size\n", " sys.stdout.write(\"\\rDownloading: {}%\".format(percent))\n", " sys.stdout.flush()\n", "\n", "def fetch_pretrained_inception_v3(url=INCEPTION_V3_URL, path=INCEPTION_PATH):\n", " if os.path.exists(INCEPTION_V3_CHECKPOINT_PATH):\n", " return\n", " os.makedirs(path, exist_ok=True)\n", " tgz_path = os.path.join(path, \"inception_v3.tgz\")\n", " urllib.request.urlretrieve(url, tgz_path, reporthook=download_progress)\n", " inception_tgz = tarfile.open(tgz_path)\n", " inception_tgz.extractall(path=path)\n", " inception_tgz.close()\n", " os.remove(tgz_path)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fetch_pretrained_inception_v3()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import re\n", "\n", "CLASS_NAME_REGEX = re.compile(r\"^n\\d+\\s+(.*)\\s*$\", re.M | re.U)\n", "\n", "def load_class_names():\n", " with open(os.path.join(\"datasets\",\"inception\",\"imagenet_class_names.txt\"), \"rb\") as f:\n", " content = f.read().decode(\"utf-8\")\n", " return CLASS_NAME_REGEX.findall(content)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class_names = load_class_names()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "width = 299\n", "height = 299\n", "channels = 3" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.image as mpimg\n", "test_image = mpimg.imread(os.path.join(\"images\",\"cnn\",\"test_image.png\"))[:, :, :channels]\n", "plt.imshow(test_image)\n", "plt.axis(\"off\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import tensorflow as tf\n", "from nets.inception_v3 import inception_v3, inception_v3_arg_scope\n", "import tensorflow.contrib.slim as slim\n", "\n", "tf.reset_default_graph()\n", "\n", "X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name=\"X\")\n", "with slim.arg_scope(inception_v3_arg_scope()):\n", " logits, end_points = inception_v3(X, num_classes=1001, is_training=False)\n", "predictions = end_points[\"Predictions\"]\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_test = test_image.reshape(-1, height, width, channels)\n", "\n", "with tf.Session() as sess:\n", " saver.restore(sess, INCEPTION_V3_CHECKPOINT_PATH)\n", " predictions_val = predictions.eval(feed_dict={X: X_test})" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class_names[np.argmax(predictions_val[0])]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [], "source": [ "np.argmax(predictions_val, axis=1)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [], "source": [ "top_5 = np.argpartition(predictions_val[0], -5)[-5:]\n", "top_5 = top_5[np.argsort(predictions_val[0][top_5])]\n", "for i in top_5:\n", " print(\"{0}: {1:.2f}%\".format(class_names[i], 100*predictions_val[0][i]))" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Exercise solutions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Coming soon**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" }, "nav_menu": {}, "toc": { "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 6, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }