diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb index 872356b..db1f5b5 100644 --- a/13_convolutional_neural_networks.ipynb +++ b/13_convolutional_neural_networks.ipynb @@ -278,25 +278,103 @@ }, "outputs": [], "source": [ + "import numpy as np\n", + "from sklearn.datasets import load_sample_images\n", + "\n", + "# Load sample images\n", + "china = load_sample_image(\"china.jpg\")\n", + "flower = load_sample_image(\"flower.jpg\")\n", "dataset = np.array([china, flower], dtype=np.float32)\n", "batch_size, height, width, channels = dataset.shape\n", "\n", + "# Create 2 filters\n", "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n", "filters[:, 3, :, 0] = 1 # vertical line\n", "filters[3, :, :, 1] = 1 # horizontal line\n", "\n", + "# Create a graph with input X plus a convolutional layer applying the 2 filters\n", "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", "convolution = tf.nn.conv2d(X, filters, strides=[1,2,2,1], padding=\"SAME\")\n", "\n", "with tf.Session() as sess:\n", " output = sess.run(convolution, feed_dict={X: dataset})\n", "\n", + "plt.imshow(output[0, :, :, 1], cmap=\"gray\") # plot 1st image's 2nd feature map\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ "for image_index in (0, 1):\n", " for feature_map_index in (0, 1):\n", " plot_image(output[image_index, :, :, feature_map_index])\n", " plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using `tf.layers.conv2d()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "X = tf.placeholder(shape=(None, height, width, channels), dtype=tf.float32)\n", + "conv = tf.layers.conv2d(X, filters=2, kernel_size=7, strides=[2,2],\n", + " padding=\"SAME\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()\n", + "\n", + "with tf.Session() as sess:\n", + " init.run()\n", + " output = sess.run(conv, feed_dict={X: dataset})" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.imshow(output[0, :, :, 1], cmap=\"gray\") # plot 1st image's 2nd feature map\n", + "plt.show()" + ] + }, { "cell_type": "markdown", "metadata": { @@ -309,7 +387,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, @@ -333,7 +411,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": { "collapsed": false, "deletable": true, @@ -362,11 +440,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -374,14 +450,37 @@ "\n", "filters = np.zeros(shape=(7, 7, channels, 2), dtype=np.float32)\n", "filters[:, 3, :, 0] = 1 # vertical line\n", - "filters[3, :, :, 1] = 1 # horizontal line\n", - "\n", + "filters[3, :, :, 1] = 1 # horizontal line" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ "X = tf.placeholder(tf.float32, shape=(None, height, width, channels))\n", - "max_pool = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1,2,2,1], padding=\"VALID\")\n", + "max_pool = tf.nn.max_pool(X, ksize=[1,2,2,1], strides=[1,2,2,1],padding=\"VALID\")\n", "\n", "with tf.Session() as sess:\n", " output = sess.run(max_pool, feed_dict={X: dataset})\n", "\n", + "plt.imshow(output[0].astype(np.uint8)) # plot the output for the 1st image\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ "plot_color_image(dataset[0])\n", "save_fig(\"china_original\")\n", "plt.show()\n", @@ -429,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, @@ -495,7 +594,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, @@ -509,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, @@ -587,7 +686,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 25, "metadata": { "collapsed": true, "deletable": true, @@ -671,7 +770,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, @@ -695,7 +794,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 27, "metadata": { "collapsed": true, "deletable": true, @@ -728,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 28, "metadata": { "collapsed": true, "deletable": true, @@ -758,7 +857,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 29, "metadata": { "collapsed": false, "deletable": true, @@ -819,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 30, "metadata": { "collapsed": true, "deletable": true, @@ -834,7 +933,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, @@ -862,7 +961,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 32, "metadata": { "collapsed": true, "deletable": true, @@ -898,7 +997,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, @@ -911,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 34, "metadata": { "collapsed": true, "deletable": true, @@ -931,7 +1030,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, @@ -944,7 +1043,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, @@ -968,7 +1067,18 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 37, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, @@ -979,11 +1089,10 @@ "from tensorflow.contrib.slim.nets import inception\n", "import tensorflow.contrib.slim as slim\n", "\n", - "tf.reset_default_graph()\n", - "\n", - "X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name=\"X\")\n", + "X = tf.placeholder(tf.float32, shape=[None, 299, 299, 3], name=\"X\")\n", "with slim.arg_scope(inception.inception_v3_arg_scope()):\n", - " logits, end_points = inception.inception_v3(X, num_classes=1001, is_training=False)\n", + " logits, end_points = inception.inception_v3(\n", + " X, num_classes=1001, is_training=False)\n", "predictions = end_points[\"Predictions\"]\n", "saver = tf.train.Saver()" ] @@ -1001,9 +1110,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 39, "metadata": { - "collapsed": true, + "collapsed": false, "deletable": true, "editable": true }, @@ -1027,7 +1136,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, @@ -1044,7 +1153,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 41, "metadata": { "collapsed": false, "deletable": true, @@ -1058,7 +1167,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, @@ -1071,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 43, "metadata": { "collapsed": false, "deletable": true, @@ -1121,7 +1230,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, @@ -1150,7 +1259,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 45, "metadata": { "collapsed": false, "deletable": true, @@ -1173,7 +1282,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, @@ -1199,7 +1308,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 47, "metadata": { "collapsed": true, "deletable": true, @@ -1230,7 +1339,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 48, "metadata": { "collapsed": true, "deletable": true, @@ -1254,7 +1363,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 49, "metadata": { "collapsed": false, "deletable": true, @@ -1318,7 +1427,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 50, "metadata": { "collapsed": true, "deletable": true, @@ -1369,7 +1478,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Note: at test time, the preprocessing step should be as light as possible, just the bare minimum necessary to be able to feed the image to the neural network. You may want to tweak the above function to add a `training` parameter: if `False`, preprocessing should be limited to the bare minimum (i.e., no flipping the image, and just the minimum cropping required, preserving the center of the image)." ] @@ -1386,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 51, "metadata": { "collapsed": false, "deletable": true, @@ -1413,7 +1525,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 52, "metadata": { "collapsed": false, "deletable": true, @@ -1442,7 +1554,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 53, "metadata": { "collapsed": false, "deletable": true, @@ -1485,7 +1597,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 54, "metadata": { "collapsed": false, "deletable": true, @@ -1546,7 +1658,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 55, "metadata": { "collapsed": false, "deletable": true, @@ -1602,7 +1714,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 56, "metadata": { "collapsed": false, "deletable": true, @@ -1635,7 +1747,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 57, "metadata": { "collapsed": false, "deletable": true, @@ -1658,7 +1770,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 58, "metadata": { "collapsed": false, "deletable": true, @@ -1681,7 +1793,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 59, "metadata": { "collapsed": false, "deletable": true, @@ -1704,7 +1816,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1727,7 +1839,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 61, "metadata": { "collapsed": false, "deletable": true, @@ -1750,7 +1862,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 62, "metadata": { "collapsed": true, "deletable": true, @@ -1773,7 +1885,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 63, "metadata": { "collapsed": false, "deletable": true, @@ -1808,7 +1920,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 64, "metadata": { "collapsed": false, "deletable": true, @@ -1836,7 +1948,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 65, "metadata": { "collapsed": false, "deletable": true, @@ -1880,7 +1992,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 66, "metadata": { "collapsed": false, "deletable": true, @@ -1904,7 +2016,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 67, "metadata": { "collapsed": false, "deletable": true, @@ -1930,7 +2042,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 68, "metadata": { "collapsed": false, "deletable": true, @@ -1959,7 +2071,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 69, "metadata": { "collapsed": false, "deletable": true, @@ -1982,7 +2094,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 70, "metadata": { "collapsed": true, "deletable": true, @@ -2003,7 +2115,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 71, "metadata": { "collapsed": false, "deletable": true, @@ -2016,7 +2128,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 72, "metadata": { "collapsed": false, "deletable": true, @@ -2029,7 +2141,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 73, "metadata": { "collapsed": false, "deletable": true, @@ -2042,7 +2154,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 74, "metadata": { "collapsed": false, "deletable": true, @@ -2055,7 +2167,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 75, "metadata": { "collapsed": false, "deletable": true, @@ -2078,7 +2190,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 76, "metadata": { "collapsed": false, "deletable": true, @@ -2091,7 +2203,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": { "collapsed": false, "deletable": true, @@ -2124,7 +2236,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "X_test, y_test = prepare_batch(flower_paths_and_classes_test, batch_size=len(flower_paths_and_classes_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "We could prepare the training set in much the same way, but it would only generate one variant for each image. Instead, it's preferable to generate the training batches on the fly during training, so that we can really benefit from data augmentation, with many variants of each image." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "And now, we are ready to train the network (or more precisely, the output layer we just added, since all the other layers are frozen). Be aware that this may take a (very) long time." + ] + }, + { + "cell_type": "code", + "execution_count": 80, "metadata": { "collapsed": false, "deletable": true, @@ -2188,7 +2346,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 81, "metadata": { "collapsed": true, "deletable": true,