diff --git a/10_introduction_to_artificial_neural_networks.ipynb b/10_introduction_to_artificial_neural_networks.ipynb index 7c22236..5ab7456 100644 --- a/10_introduction_to_artificial_neural_networks.ipynb +++ b/10_introduction_to_artificial_neural_networks.ipynb @@ -77,9 +77,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -90,7 +88,7 @@ "X = iris.data[:, (2, 3)] # petal length, petal width\n", "y = (iris.target == 0).astype(np.int)\n", "\n", - "per_clf = Perceptron(random_state=42)\n", + "per_clf = Perceptron(max_iter=100, random_state=42)\n", "per_clf.fit(X, y)\n", "\n", "y_pred = per_clf.predict([[2, 0.5]])" @@ -132,7 +130,7 @@ "from matplotlib.colors import ListedColormap\n", "custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n", "\n", - "plt.contourf(x0, x1, zz, cmap=custom_cmap, linewidth=5)\n", + "plt.contourf(x0, x1, zz, cmap=custom_cmap)\n", "plt.xlabel(\"Petal length\", fontsize=14)\n", "plt.ylabel(\"Petal width\", fontsize=14)\n", "plt.legend(loc=\"lower right\", fontsize=14)\n", @@ -152,9 +150,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def logit(z):\n", @@ -206,9 +202,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def heaviside(z):\n", @@ -262,7 +256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## using tf.learn" + "## Using the Estimator API (formerly `tf.contrib.learn`)" ] }, { @@ -271,23 +265,29 @@ "metadata": {}, "outputs": [], "source": [ - "from tensorflow.examples.tutorials.mnist import input_data\n", - "\n", - "mnist = input_data.read_data_sets(\"/tmp/data/\")" + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Warning**: `tf.examples.tutorials.mnist` is deprecated. We will use `tf.keras.datasets.mnist` instead. Moreover, the `tf.contrib.learn` API was promoted to `tf.estimators` and `tf.feature_columns`, and it has changed considerably. In particular, there is no `infer_real_valued_columns_from_input()` function or `SKCompat` class." ] }, { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "X_train = mnist.train.images\n", - "X_test = mnist.test.images\n", - "y_train = mnist.train.labels.astype(\"int\")\n", - "y_test = mnist.test.labels.astype(\"int\")" + "(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()\n", + "X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0\n", + "X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0\n", + "y_train = y_train.astype(np.int32)\n", + "y_test = y_test.astype(np.int32)\n", + "X_valid, X_train = X_train[:5000], X_train[5000:]\n", + "y_valid, y_train = y_train[:5000], y_train[5000:]" ] }, { @@ -296,15 +296,13 @@ "metadata": {}, "outputs": [], "source": [ - "import tensorflow as tf\n", + "feature_cols = [tf.feature_column.numeric_column(\"X\", shape=[28 * 28])]\n", + "dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,\n", + " feature_columns=feature_cols)\n", "\n", - "config = tf.contrib.learn.RunConfig(tf_random_seed=42) # not shown in the config\n", - "\n", - "feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)\n", - "dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=10,\n", - " feature_columns=feature_cols, config=config)\n", - "dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) # if TensorFlow >= 1.1\n", - "dnn_clf.fit(X_train, y_train, batch_size=50, steps=40000)" + "input_fn = tf.estimator.inputs.numpy_input_fn(\n", + " x={\"X\": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)\n", + "dnn_clf.train(input_fn=input_fn)" ] }, { @@ -313,10 +311,9 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.metrics import accuracy_score\n", - "\n", - "y_pred = dnn_clf.predict(X_test)\n", - "accuracy_score(y_test, y_pred['classes'])" + "test_input_fn = tf.estimator.inputs.numpy_input_fn(\n", + " x={\"X\": X_test}, y=y_test, shuffle=False)\n", + "eval_results = dnn_clf.evaluate(input_fn=test_input_fn)" ] }, { @@ -325,10 +322,18 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.metrics import log_loss\n", - "\n", - "y_pred_proba = y_pred['probabilities']\n", - "log_loss(y_test, y_pred_proba)" + "eval_results" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_iter = dnn_clf.predict(input_fn=test_input_fn)\n", + "y_pred = list(y_pred_iter)\n", + "y_pred[0]" ] }, { @@ -342,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -356,19 +361,19 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", - "y = tf.placeholder(tf.int64, shape=(None), name=\"y\")" + "y = tf.placeholder(tf.int32, shape=(None), name=\"y\")" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -388,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -402,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -414,7 +419,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -427,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -438,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -448,7 +453,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -458,55 +463,69 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [], + "source": [ + "def shuffle_batch(X, y, batch_size):\n", + " rnd_idx = np.random.permutation(len(X))\n", + " n_batches = len(X) // batch_size\n", + " for batch_idx in np.array_split(rnd_idx, n_batches):\n", + " X_batch, y_batch = X[batch_idx], y[batch_idx]\n", + " yield X_batch, y_batch" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", - " for iteration in range(mnist.train.num_examples // batch_size):\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", - " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images,\n", - " y: mnist.validation.labels})\n", - " print(epoch, \"Train accuracy:\", acc_train, \"Val accuracy:\", acc_val)\n", + " acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})\n", + " print(epoch, \"Batch accuracy:\", acc_batch, \"Val accuracy:\", acc_val)\n", "\n", " save_path = saver.save(sess, \"./my_model_final.ckpt\")" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", " saver.restore(sess, \"./my_model_final.ckpt\") # or better, use save_path\n", - " X_new_scaled = mnist.test.images[:20]\n", + " X_new_scaled = X_test[:20]\n", " Z = logits.eval(feed_dict={X: X_new_scaled})\n", " y_pred = np.argmax(Z, axis=1)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "print(\"Predicted classes:\", y_pred)\n", - "print(\"Actual classes: \", mnist.test.labels[:20])" + "print(\"Actual classes: \", y_test[:20])" ] }, { "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": true - }, + "execution_count": 28, + "metadata": {}, "outputs": [], "source": [ + "# Works on Chrome, not guaranteed on other browsers\n", + "\n", "from IPython.display import clear_output, Image, display, HTML\n", "\n", "def strip_consts(graph_def, max_const_size=32):\n", @@ -547,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -565,7 +584,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n", + "Note: previous releases of the book used `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n", "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", "* the default `activation` is now `None` rather than `tf.nn.relu`.\n", "* a few more differences are presented in chapter 11." @@ -573,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -585,21 +604,19 @@ }, { "cell_type": "code", - "execution_count": 29, - "metadata": { - "collapsed": true - }, + "execution_count": 31, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", - "y = tf.placeholder(tf.int64, shape=(None), name=\"y\") " + "y = tf.placeholder(tf.int32, shape=(None), name=\"y\") " ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -608,15 +625,14 @@ " activation=tf.nn.relu)\n", " hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\",\n", " activation=tf.nn.relu)\n", - " logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")" + " logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n", + " y_proba = tf.nn.softmax(logits)" ] }, { "cell_type": "code", - "execution_count": 31, - "metadata": { - "collapsed": true - }, + "execution_count": 33, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"loss\"):\n", @@ -626,10 +642,8 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": { - "collapsed": true - }, + "execution_count": 34, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.01\n", @@ -641,10 +655,8 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": { - "collapsed": true - }, + "execution_count": 35, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"eval\"):\n", @@ -654,10 +666,8 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": true - }, + "execution_count": 36, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()\n", @@ -666,7 +676,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -676,19 +686,18 @@ "with tf.Session() as sess:\n", " init.run()\n", " for epoch in range(n_epochs):\n", - " for iteration in range(mnist.train.num_examples // batch_size):\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", - " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", - " acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})\n", - " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + " acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", + " acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})\n", + " print(epoch, \"Batch accuracy:\", acc_batch, \"Validation accuracy:\", acc_valid)\n", "\n", " save_path = saver.save(sess, \"./my_model_final.ckpt\")" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -743,7 +752,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -755,21 +764,19 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": { - "collapsed": true - }, + "execution_count": 41, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", "\n", "X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n", - "y = tf.placeholder(tf.int64, shape=(None), name=\"y\") " + "y = tf.placeholder(tf.int32, shape=(None), name=\"y\") " ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -783,10 +790,8 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "collapsed": true - }, + "execution_count": 43, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"loss\"):\n", @@ -797,10 +802,8 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": { - "collapsed": true - }, + "execution_count": 44, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.01\n", @@ -812,10 +815,8 @@ }, { "cell_type": "code", - "execution_count": 42, - "metadata": { - "collapsed": true - }, + "execution_count": 45, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"eval\"):\n", @@ -826,10 +827,8 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": { - "collapsed": true - }, + "execution_count": 46, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()\n", @@ -845,10 +844,8 @@ }, { "cell_type": "code", - "execution_count": 44, - "metadata": { - "collapsed": true - }, + "execution_count": 47, + "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", @@ -864,10 +861,8 @@ }, { "cell_type": "code", - "execution_count": 45, - "metadata": { - "collapsed": true - }, + "execution_count": 48, + "metadata": {}, "outputs": [], "source": [ "logdir = log_dir(\"mnist_dnn\")" @@ -882,10 +877,8 @@ }, { "cell_type": "code", - "execution_count": 46, - "metadata": { - "collapsed": true - }, + "execution_count": 49, + "metadata": {}, "outputs": [], "source": [ "file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())" @@ -895,33 +888,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Hey! Why don't we implement early stopping? For this, we are going to need a validation set. Luckily, the dataset returned by TensorFlow's `input_data()` function (see above) is already split into a training set (60,000 instances, already shuffled for us), a validation set (5,000 instances) and a test set (5,000 instances). So we can easily define `X_valid` and `y_valid`:" + "Hey! Why don't we implement early stopping? For this, we are going to need to use the validation set." ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 50, "metadata": {}, "outputs": [], - "source": [ - "X_valid = mnist.validation.images\n", - "y_valid = mnist.validation.labels" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "collapsed": true - }, - "outputs": [], "source": [ "m, n = X_train.shape" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -949,8 +930,7 @@ " sess.run(init)\n", "\n", " for epoch in range(start_epoch, n_epochs):\n", - " for iteration in range(mnist.train.num_examples // batch_size):\n", - " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", + " for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})\n", " file_writer.add_summary(accuracy_summary_str, epoch)\n", @@ -974,7 +954,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -983,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -994,7 +974,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -1004,9 +984,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [] } @@ -1027,7 +1005,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.5.2" }, "nav_menu": { "height": "264px",