614 lines
15 KiB
Plaintext
614 lines
15 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Chapter 13 – Convolutional Neural Networks**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"_This notebook contains all the sample code and solutions to the exercices in chapter 13._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# To support both python 2 and python 3\n",
|
||
"from __future__ import division, print_function, unicode_literals\n",
|
||
"\n",
|
||
"# Common imports\n",
|
||
"import numpy as np\n",
|
||
"import numpy.random as rnd\n",
|
||
"import os\n",
|
||
"\n",
|
||
"# to make this notebook's output stable across runs\n",
|
||
"rnd.seed(42)\n",
|
||
"\n",
|
||
"# To plot pretty figures\n",
|
||
"%matplotlib inline\n",
|
||
"import matplotlib\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"plt.rcParams['axes.labelsize'] = 14\n",
|
||
"plt.rcParams['xtick.labelsize'] = 12\n",
|
||
"plt.rcParams['ytick.labelsize'] = 12\n",
|
||
"\n",
|
||
"# Where to save the figures\n",
|
||
"PROJECT_ROOT_DIR = \".\"\n",
|
||
"CHAPTER_ID = \"cnn\"\n",
|
||
"\n",
|
||
"def save_fig(fig_id, tight_layout=True):\n",
|
||
" path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
|
||
" print(\"Saving figure\", fig_id)\n",
|
||
" if tight_layout:\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.savefig(path, format='png', dpi=300)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"A couple utility functions to plot grayscale and RGB images:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_image(image):\n",
|
||
" plt.imshow(image, cmap=\"gray\", interpolation=\"nearest\")\n",
|
||
" plt.axis(\"off\")\n",
|
||
"\n",
|
||
"def plot_color_image(image):\n",
|
||
" plt.imshow(image.astype(np.uint8),interpolation=\"nearest\")\n",
|
||
" plt.axis(\"off\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"And of course we will need TensorFlow:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Convolutional layer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import load_sample_images\n",
|
||
"dataset = load_sample_images()\n",
|
||
"china, flower = dataset.images\n",
|
||
"image = china[150:220, 130:250]\n",
|
||
"height, width, channels = image.shape\n",
|
||
"image_grayscale = image.mean(axis=2).astype(np.float32)\n",
|
||
"images = image_grayscale.reshape(1, height, width, 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"fmap = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32)\n",
|
||
"fmap[:, 3, 0, 0] = 1\n",
|
||
"fmap[3, :, 0, 1] = 1\n",
|
||
"fmap[:, :, 0, 0]\n",
|
||
"plot_image(fmap[:, :, 0, 0])\n",
|
||
"plt.show()\n",
|
||
"plot_image(fmap[:, :, 0, 1])\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, height, width, 1))\n",
|
||
"feature_maps = tf.constant(fmap)\n",
|
||
"convolution = tf.nn.conv2d(X, feature_maps, strides=[1,1,1,1], padding=\"SAME\", use_cudnn_on_gpu=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"with tf.Session() as sess:\n",
|
||
" output = convolution.eval(feed_dict={X: images})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_image(images[0, :, :, 0])\n",
|
||
"save_fig(\"china_original\", tight_layout=False)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_image(output[0, :, :, 0])\n",
|
||
"save_fig(\"china_vertical\", tight_layout=False)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"plot_image(output[0, :, :, 1])\n",
|
||
"save_fig(\"china_horizontal\", tight_layout=False)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Simple example"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import load_sample_images\n",
|
||
"dataset = np.array(load_sample_images().images, dtype=np.float32)\n",
|
||
"batch_size, height, width, channels = dataset.shape\n",
|
||
"\n",
|
||
"filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n",
|
||
"filters[:, 3, :, 0] = 1 # vertical line\n",
|
||
"filters[3, :, :, 1] = 1 # horizontal line\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n",
|
||
"convolution = tf.nn.conv2d(X, filters, strides=[1,2,2,1], padding=\"SAME\")\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" output = sess.run(convolution, feed_dict={X: dataset})\n",
|
||
"\n",
|
||
"for image_index in (0, 1):\n",
|
||
" for feature_map_index in (0, 1):\n",
|
||
" plot_image(output[image_index, :, :, feature_map_index])\n",
|
||
" plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## VALID vs SAME padding"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"filter_primes = np.array([2., 3., 5., 7., 11., 13.], dtype=np.float32)\n",
|
||
"x = tf.constant(np.arange(1, 13+1, dtype=np.float32).reshape([1, 1, 13, 1]))\n",
|
||
"filters = tf.constant(filter_primes.reshape(1, 6, 1, 1))\n",
|
||
"\n",
|
||
"valid_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='VALID')\n",
|
||
"same_conv = tf.nn.conv2d(x, filters, strides=[1, 1, 5, 1], padding='SAME')\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" print(\"VALID:\\n\", valid_conv.eval())\n",
|
||
" print(\"SAME:\\n\", same_conv.eval())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(\"VALID:\")\n",
|
||
"print(np.array([1,2,3,4,5,6]).T.dot(filter_primes))\n",
|
||
"print(np.array([6,7,8,9,10,11]).T.dot(filter_primes))\n",
|
||
"print(\"SAME:\")\n",
|
||
"print(np.array([0,1,2,3,4,5]).T.dot(filter_primes))\n",
|
||
"print(np.array([5,6,7,8,9,10]).T.dot(filter_primes))\n",
|
||
"print(np.array([10,11,12,13,0,0]).T.dot(filter_primes))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Pooling layer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import load_sample_images\n",
|
||
"dataset = np.array(load_sample_images().images, dtype=np.float32)\n",
|
||
"batch_size, height, width, channels = dataset.shape\n",
|
||
"\n",
|
||
"filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n",
|
||
"filters[:, 3, :, 0] = 1 # vertical line\n",
|
||
"filters[3, :, :, 1] = 1 # horizontal line\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n",
|
||
"max_pool = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1,2,2,1], padding=\"VALID\")\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" output = sess.run(max_pool, feed_dict={X: dataset})\n",
|
||
"\n",
|
||
"plot_color_image(dataset[0])\n",
|
||
"save_fig(\"china_original\")\n",
|
||
"plt.show()\n",
|
||
" \n",
|
||
"plot_color_image(output[0])\n",
|
||
"save_fig(\"china_max_pool\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# MNIST"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import fetch_mldata\n",
|
||
"\n",
|
||
"mnist = fetch_mldata('MNIST original')\n",
|
||
"X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
|
||
"y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"height, width = 28, 28\n",
|
||
"images = X_test[5000].reshape(1, height, width, 1)\n",
|
||
"plot_image(images[0, :, :, 0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Inception v3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"import sys\n",
|
||
"import tarfile\n",
|
||
"import urllib.request\n",
|
||
"\n",
|
||
"TF_MODELS_URL = \"http://download.tensorflow.org/models\"\n",
|
||
"INCEPTION_V3_URL = TF_MODELS_URL + \"/inception_v3_2016_08_28.tar.gz\"\n",
|
||
"INCEPTION_PATH = os.path.join(\"datasets\", \"inception\")\n",
|
||
"INCEPTION_V3_CHECKPOINT_PATH = os.path.join(INCEPTION_PATH, \"inception_v3.ckpt\")\n",
|
||
"\n",
|
||
"def download_progress(count, block_size, total_size):\n",
|
||
" percent = count * block_size * 100 // total_size\n",
|
||
" sys.stdout.write(\"\\rDownloading: {}%\".format(percent))\n",
|
||
" sys.stdout.flush()\n",
|
||
"\n",
|
||
"def fetch_pretrained_inception_v3(url=INCEPTION_V3_URL, path=INCEPTION_PATH):\n",
|
||
" if os.path.exists(INCEPTION_V3_CHECKPOINT_PATH):\n",
|
||
" return\n",
|
||
" os.makedirs(path, exist_ok=True)\n",
|
||
" tgz_path = os.path.join(path, \"inception_v3.tgz\")\n",
|
||
" urllib.request.urlretrieve(url, tgz_path, reporthook=download_progress)\n",
|
||
" inception_tgz = tarfile.open(tgz_path)\n",
|
||
" inception_tgz.extractall(path=path)\n",
|
||
" inception_tgz.close()\n",
|
||
" os.remove(tgz_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"fetch_pretrained_inception_v3()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import re\n",
|
||
"\n",
|
||
"CLASS_NAME_REGEX = re.compile(r\"^n\\d+\\s+(.*)\\s*$\", re.M | re.U)\n",
|
||
"\n",
|
||
"def load_class_names():\n",
|
||
" with open(os.path.join(\"datasets\",\"inception\",\"imagenet_class_names.txt\"), \"rb\") as f:\n",
|
||
" content = f.read().decode(\"utf-8\")\n",
|
||
" return CLASS_NAME_REGEX.findall(content)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"class_names = load_class_names()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"width = 299\n",
|
||
"height = 299\n",
|
||
"channels = 3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import matplotlib.image as mpimg\n",
|
||
"test_image = mpimg.imread(os.path.join(\"images\",\"cnn\",\"test_image.png\"))[:, :, :channels]\n",
|
||
"plt.imshow(test_image)\n",
|
||
"plt.axis(\"off\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"from nets.inception_v3 import inception_v3, inception_v3_arg_scope\n",
|
||
"import tensorflow.contrib.slim as slim\n",
|
||
"\n",
|
||
"tf.reset_default_graph()\n",
|
||
"\n",
|
||
"X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name=\"X\")\n",
|
||
"with slim.arg_scope(inception_v3_arg_scope()):\n",
|
||
" logits, end_points = inception_v3(X, num_classes=1001, is_training=False)\n",
|
||
"predictions = end_points[\"Predictions\"]\n",
|
||
"saver = tf.train.Saver()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_test = test_image.reshape(-1, height, width, channels)\n",
|
||
"\n",
|
||
"with tf.Session() as sess:\n",
|
||
" saver.restore(sess, INCEPTION_V3_CHECKPOINT_PATH)\n",
|
||
" predictions_val = predictions.eval(feed_dict={X: X_test})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"class_names[np.argmax(predictions_val[0])]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.argmax(predictions_val, axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"top_5 = np.argpartition(predictions_val[0], -5)[-5:]\n",
|
||
"top_5 = top_5[np.argsort(predictions_val[0][top_5])]\n",
|
||
"for i in top_5:\n",
|
||
" print(\"{0}: {1:.2f}%\".format(class_names[i], 100*predictions_val[0][i]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"source": [
|
||
"# Exercise solutions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Coming soon**"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.5.1"
|
||
},
|
||
"nav_menu": {},
|
||
"toc": {
|
||
"navigate_menu": true,
|
||
"number_sections": true,
|
||
"sideBar": true,
|
||
"threshold": 6,
|
||
"toc_cell": false,
|
||
"toc_section_display": "block",
|
||
"toc_window_display": false
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
}
|