diff --git a/10_neural_nets_with_keras.ipynb b/10_neural_nets_with_keras.ipynb new file mode 100644 index 0000000..630205c --- /dev/null +++ b/10_neural_nets_with_keras.ipynb @@ -0,0 +1,1691 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Chapter 10 – Introduction to Artificial Neural Networks with Keras**\n", + "\n", + "_This notebook contains all the sample code and solutions to the exercises in chapter 10._" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Python ≥3.5 is required\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", + "\n", + "# Common imports\n", + "import numpy as np\n", + "import os\n", + "\n", + "# to make this notebook's output stable across runs\n", + "np.random.seed(42)\n", + "\n", + "# To plot pretty figures\n", + "%matplotlib inline\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", + "\n", + "# Where to save the figures\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"ann\"\n", + "\n", + "def save_fig(fig_id, tight_layout=True):\n", + " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + " print(\"Saving figure\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format='png', dpi=300)\n", + "\n", + "# Ignore useless warnings (see SciPy issue #5998)\n", + "import warnings\n", + "warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perceptrons" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: we set `max_iter` and `tol` explicitly to avoid warnings about the fact that their default value will change in future versions of Scikit-Learn." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.datasets import load_iris\n", + "from sklearn.linear_model import Perceptron\n", + "\n", + "iris = load_iris()\n", + "X = iris.data[:, (2, 3)] # petal length, petal width\n", + "y = (iris.target == 0).astype(np.int)\n", + "\n", + "per_clf = Perceptron(max_iter=100, tol=-np.infty, random_state=42)\n", + "per_clf.fit(X, y)\n", + "\n", + "y_pred = per_clf.predict([[2, 0.5]])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n", + "b = -per_clf.intercept_ / per_clf.coef_[0][1]\n", + "\n", + "axes = [0, 5, 0, 2]\n", + "\n", + "x0, x1 = np.meshgrid(\n", + " np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n", + " np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n", + " )\n", + "X_new = np.c_[x0.ravel(), x1.ravel()]\n", + "y_predict = per_clf.predict(X_new)\n", + "zz = y_predict.reshape(x0.shape)\n", + "\n", + "plt.figure(figsize=(10, 4))\n", + "plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n", + "plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n", + "\n", + "plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n", + "from matplotlib.colors import ListedColormap\n", + "custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n", + "\n", + "plt.contourf(x0, x1, zz, cmap=custom_cmap)\n", + "plt.xlabel(\"Petal length\", fontsize=14)\n", + "plt.ylabel(\"Petal width\", fontsize=14)\n", + "plt.legend(loc=\"lower right\", fontsize=14)\n", + "plt.axis(axes)\n", + "\n", + "save_fig(\"perceptron_iris_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Activation functions" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def sigmoid(z):\n", + " return 1 / (1 + np.exp(-z))\n", + "\n", + "def relu(z):\n", + " return np.maximum(0, z)\n", + "\n", + "def derivative(f, z, eps=0.000001):\n", + " return (f(z + eps) - f(z - eps))/(2 * eps)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "z = np.linspace(-5, 5, 200)\n", + "\n", + "plt.figure(figsize=(11,4))\n", + "\n", + "plt.subplot(121)\n", + "plt.plot(z, np.sign(z), \"r-\", linewidth=1, label=\"Step\")\n", + "plt.plot(z, sigmoid(z), \"g--\", linewidth=2, label=\"Sigmoid\")\n", + "plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n", + "plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n", + "plt.grid(True)\n", + "plt.legend(loc=\"center right\", fontsize=14)\n", + "plt.title(\"Activation functions\", fontsize=14)\n", + "plt.axis([-5, 5, -1.2, 1.2])\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=1, label=\"Step\")\n", + "plt.plot(0, 0, \"ro\", markersize=5)\n", + "plt.plot(0, 0, \"rx\", markersize=10)\n", + "plt.plot(z, derivative(sigmoid, z), \"g--\", linewidth=2, label=\"Sigmoid\")\n", + "plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n", + "plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n", + "plt.grid(True)\n", + "#plt.legend(loc=\"center right\", fontsize=14)\n", + "plt.title(\"Derivatives\", fontsize=14)\n", + "plt.axis([-5, 5, -0.2, 1.2])\n", + "\n", + "save_fig(\"activation_functions_plot\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def heaviside(z):\n", + " return (z >= 0).astype(z.dtype)\n", + "\n", + "def mlp_xor(x1, x2, activation=heaviside):\n", + " return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "x1s = np.linspace(-0.2, 1.2, 100)\n", + "x2s = np.linspace(-0.2, 1.2, 100)\n", + "x1, x2 = np.meshgrid(x1s, x2s)\n", + "\n", + "z1 = mlp_xor(x1, x2, activation=heaviside)\n", + "z2 = mlp_xor(x1, x2, activation=sigmoid)\n", + "\n", + "plt.figure(figsize=(10,4))\n", + "\n", + "plt.subplot(121)\n", + "plt.contourf(x1, x2, z1)\n", + "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n", + "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n", + "plt.title(\"Activation function: heaviside\", fontsize=14)\n", + "plt.grid(True)\n", + "\n", + "plt.subplot(122)\n", + "plt.contourf(x1, x2, z2)\n", + "plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n", + "plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n", + "plt.title(\"Activation function: sigmoid\", fontsize=14)\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Building an Image Classifier" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's import TensorFlow and Keras." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "tf.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "keras.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's start by loading the fashion MNIST dataset. Keras has a number of functions to load popular datasets in `keras.datasets`. The dataset is already split for you between a training set and a test set, but it can be useful to split the training set further to have a validation set:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "fashion_mnist = keras.datasets.fashion_mnist\n", + "(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The training set contains 60,000 grayscale images, each 28x28 pixels:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_full.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each pixel intensity is represented as a byte (0 to 255):" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_full.dtype" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's split the full training set into a validation set and a (smaller) training set. We also scale the pixel intensities down to the 0-1 range and convert them to floats, by dividing by 255." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.\n", + "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n", + "X_test = X_test / 255." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can plot an image using Matplotlib's `imshow()` function, with a `'binary'`\n", + " color map:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(X_train[0], cmap=\"binary\")\n", + "plt.axis('off')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The labels are the class IDs (represented as uint8), from 0 to 9:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "y_train" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here are the corresponding class names:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "class_names = [\"T-shirt/top\", \"Trouser\", \"Pullover\", \"Dress\", \"Coat\",\n", + " \"Sandal\", \"Shirt\", \"Sneaker\", \"Bag\", \"Ankle boot\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So the first image in the training set is a coat:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "class_names[y_train[0]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The validation set contains 5,000 images, and the test set contains 10,000 images:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "X_valid.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at a sample of the images in the dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "n_rows = 4\n", + "n_cols = 10\n", + "plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))\n", + "for row in range(n_rows):\n", + " for col in range(n_cols):\n", + " index = n_cols * row + col\n", + " plt.subplot(n_rows, n_cols, index + 1)\n", + " plt.imshow(X_train[index], cmap=\"binary\", interpolation=\"nearest\")\n", + " plt.axis('off')\n", + " plt.title(class_names[y_train[index]], fontsize=12)\n", + "plt.subplots_adjust(wspace=0.2, hspace=0.5)\n", + "save_fig('fashion_mnist', tight_layout=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "model.add(keras.layers.Dense(300, activation=\"relu\"))\n", + "model.add(keras.layers.Dense(100, activation=\"relu\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"relu\"),\n", + " keras.layers.Dense(100, activation=\"relu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "model.layers" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Warning**: the following 2 cells do not work yet due to [TensorFlow issue 24622](https://github.com/tensorflow/tensorflow/issues/24622) (you are using a preview version of TensorFlow, hence there are still a few issues).\n", + "You can work around this issue by applying [PR 24626](https://github.com/tensorflow/tensorflow/pull/24625/files) to your copy of `tensorflow/python/keras/utils/vis_utils.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "#keras.utils.plot_model(model, \"my_mnist_model.png\", show_shapes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Warning**: the following cell does not work yet due to [TensorFlow issue 24622](https://github.com/tensorflow/tensorflow/issues/24622) and [TensorFlow issue 24639](https://github.com/tensorflow/tensorflow/issues/24639).\n", + "You can work around issue 24639 by writing `from tensorflow.keras.utils.vis_utils import model_to_dot`." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import SVG\n", + "#SVG(keras.utils.model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "hidden1 = model.layers[1]\n", + "hidden1.name" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "model.get_layer(hidden1.name).name" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "weights, biases = hidden1.get_weights()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "weights.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "biases" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "biases.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=\"sgd\",\n", + " metrics=[\"accuracy\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is equivalent to:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=keras.losses.sparse_categorical_crossentropy,\n", + " optimizer=keras.optimizers.SGD(),\n", + " metrics=[keras.metrics.Accuracy()])" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "history = model.fit(X_train, y_train, epochs=50,\n", + " validation_data=(X_valid, y_valid))" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "history.params" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "print(history.epoch)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "history.history.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "pd.DataFrame(history.history).plot(figsize=(8, 5))\n", + "plt.grid(True)\n", + "plt.gca().set_ylim(0, 1)\n", + "save_fig(\"keras_learning_curve_graph\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "X_new = X_test[:3]\n", + "y_proba = model.predict(X_new)\n", + "y_proba.round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = model.predict_classes(X_new)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(class_names)[y_pred]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "y_new = y_test[:3]\n", + "y_new" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regression MLP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's load, split and scale the California housing dataset (the original one, not the modified one as in chapter 2):" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_california_housing\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "housing = fetch_california_housing()\n", + "\n", + "X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)\n", + "X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)\n", + "\n", + "scaler = StandardScaler()\n", + "X_train = scaler.fit_transform(X_train)\n", + "X_valid = scaler.transform(X_valid)\n", + "X_test = scaler.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Dense(30, activation=\"relu\", input_shape=X_train.shape[1:]),\n", + " keras.layers.Dense(1)\n", + "])\n", + "model.compile(loss=\"mean_squared_error\", optimizer=\"sgd\")\n", + "history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))\n", + "mse_test = model.evaluate(X_test, y_test)\n", + "X_new = X_test[:3]\n", + "y_pred = model.predict(X_new)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(pd.DataFrame(history.history))\n", + "plt.grid(True)\n", + "plt.gca().set_ylim(0, 1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Functional API" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Not all neural network models are simply sequential. Some may have complex topologies. Some may have multiple inputs and/or multiple outputs. For example, a Wide & Deep neural network (see [paper](https://ai.google/research/pubs/pub45413)) connects all or part of the inputs directly to the output layer." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "input = keras.layers.Input(shape=X_train.shape[1:])\n", + "hidden1 = keras.layers.Dense(30, activation=\"relu\")(input)\n", + "hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n", + "concat = keras.layers.concatenate([input, hidden2])\n", + "output = keras.layers.Dense(1)(concat)\n", + "model = keras.models.Model(inputs=[input], outputs=[output])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"mean_squared_error\", optimizer=\"sgd\")\n", + "history = model.fit(X_train, y_train, epochs=20,\n", + " validation_data=(X_valid, y_valid))\n", + "mse_test = model.evaluate(X_test, y_test)\n", + "y_pred = model.predict(X_new)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What if you want to send different subsets of input features through the wide or deep paths? We will send 5 features (features 0 to 4), and 6 through the deep path (features 2 to 7). Note that 3 features will go through both (features 2, 3 and 4)." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "input_A = keras.layers.Input(shape=[5])\n", + "input_B = keras.layers.Input(shape=[6])\n", + "hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_B)\n", + "hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n", + "concat = keras.layers.concatenate([input_A, hidden2])\n", + "output = keras.layers.Dense(1)(concat)\n", + "model = keras.models.Model(inputs=[input_A, input_B], outputs=[output])" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"mse\", optimizer=\"sgd\")\n", + "\n", + "X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]\n", + "X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]\n", + "X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]\n", + "X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]\n", + "\n", + "history = model.fit((X_train_A, X_train_B), y_train, epochs=20,\n", + " validation_data=((X_valid_A, X_valid_B), y_valid))\n", + "mse_test = model.evaluate((X_test_A, X_test_B), y_test)\n", + "y_pred = model.predict((X_new_A, X_new_B))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adding an auxiliary output for regularization:" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "input_A = keras.layers.Input(shape=[5])\n", + "input_B = keras.layers.Input(shape=[6])\n", + "hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_B)\n", + "hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n", + "concat = keras.layers.concatenate([input_A, hidden2])\n", + "output = keras.layers.Dense(1)(concat)\n", + "aux_output = keras.layers.Dense(1)(hidden2)\n", + "model = keras.models.Model(inputs=[input_A, input_B],\n", + " outputs=[output, aux_output])" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=[\"mse\", \"mse\"], loss_weights=[0.9, 0.1], optimizer=\"sgd\")" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,\n", + " validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "total_loss, main_loss, aux_loss = model.evaluate(\n", + " [X_test_A, X_test_B], [y_test, y_test])\n", + "y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# The subclassing API" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "class WideAndDeepModel(keras.models.Model):\n", + " def __init__(self, units=30, activation=\"relu\"):\n", + " super().__init__()\n", + " self.hidden1 = keras.layers.Dense(units, activation=activation)\n", + " self.hidden2 = keras.layers.Dense(units, activation=activation)\n", + " self.main_output = keras.layers.Dense(1)\n", + " self.aux_output = keras.layers.Dense(1)\n", + " \n", + " def call(self, inputs):\n", + " input_A, input_B = inputs\n", + " hidden1 = self.hidden1(input_B)\n", + " hidden2 = self.hidden2(hidden1)\n", + " concat = keras.layers.concatenate([input_A, hidden2])\n", + " main_output = self.main_output(concat)\n", + " aux_output = self.aux_output(hidden2)\n", + " return main_output, aux_output\n", + "\n", + "model = WideAndDeepModel(30, activation=\"relu\")" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"mse\", loss_weights=[0.9, 0.1], optimizer=\"sgd\")\n", + "history = model.fit((X_train_A, X_train_B), (y_train, y_train), epochs=10,\n", + " validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))\n", + "total_loss, main_loss, aux_loss = model.evaluate((X_test_A, X_test_B), (y_test, y_test))\n", + "y_pred_main, y_pred_aux = model.predict((X_new_A, X_new_B))" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "model = WideAndDeepModel(30, activation=\"relu\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Saving and Restoring" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n", + " keras.layers.Dense(30, activation=\"relu\"),\n", + " keras.layers.Dense(1)\n", + "]) " + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"mse\", optimizer=\"sgd\")\n", + "history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))\n", + "mse_test = model.evaluate(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "model.save(\"my_keras_model.h5\")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.load_model(\"my_keras_model.h5\")" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "model.predict(X_new)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "model.save_weights(\"my_keras_weights.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "model.load_weights(\"my_keras_weights.ckpt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Callbacks during Training" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n", + " keras.layers.Dense(30, activation=\"relu\"),\n", + " keras.layers.Dense(1)\n", + "]) " + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"mse\", optimizer=\"sgd\")\n", + "checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_keras_model.h5\", save_best_only=True)\n", + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[checkpoint_cb])\n", + "model = keras.models.load_model(\"my_keras_model.h5\") # rollback to best model\n", + "mse_test = model.evaluate(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"mse\", optimizer=\"sgd\")\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,\n", + " restore_best_weights=True)\n", + "history = model.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[checkpoint_cb, early_stopping_cb])\n", + "mse_test = model.evaluate(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "class PrintValTrainRatioCallback(keras.callbacks.Callback):\n", + " def on_epoch_end(self, epoch, logs):\n", + " print(\"\\nval/train: {:.2f}\".format(logs[\"val_loss\"] / logs[\"loss\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "val_train_ratio_cb = PrintValTrainRatioCallback()\n", + "history = model.fit(X_train, y_train, epochs=1,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[val_train_ratio_cb])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TensorBoard" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "root_logdir = os.path.join(os.curdir, \"my_logs\")" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "def get_run_logdir():\n", + " import time\n", + " run_id = time.strftime(\"run_%Y_%m_%d-%H_%M_%S\")\n", + " return os.path.join(root_logdir, run_id)\n", + "\n", + "run_logdir = get_run_logdir()\n", + "run_logdir" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n", + " keras.layers.Dense(30, activation=\"relu\"),\n", + " keras.layers.Dense(1)\n", + "]) \n", + "#model.compile(loss=\"mse\", optimizer=\"sgd\")\n", + "# or try another learning rate:\n", + "model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.05))" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "history = model.fit(X_train, y_train, epochs=30,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[checkpoint_cb, tensorboard_cb])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To start the TensorBoard server, one option is to open a terminal, if needed activate the virtualenv where you installed TensorBoard, then type:\n", + "\n", + "```bash\n", + "$ tensorboard --logdir=./my_logs --port=6006\n", + "```\n", + "\n", + "You can then open your web browser to [localhost:6006](http://localhost:6006) and use TensorBoard. Once you are done, press Ctrl-C in the terminal window, this will shutdown the TensorBoard server.\n", + "\n", + "Alternatively, you can create a Jupyter cell with this code:\n", + "\n", + "```bash\n", + "%%bash\n", + "tensorboard --logdir={run_logdir} --port=6006\n", + "```\n", + "\n", + "When you run this cell, the TensorBoard server will start and you can use it at [localhost:6006](http://localhost:6006), but Jupyter will be blocked until you interrupt this cell, which will shutdown the server.\n", + "\n", + "Lastly, you can use the following `tb()` function that starts the TensorBoard server in a way that does not block Jupyter, and directly opens a new browser tab for you. It returns a handle on the server's process, so you can call `server.kill()` when you want to shutdown the server. Note that interrupting this notebook will shutdown all TensorBoard servers that you started this way.\n", + "\n", + "You may also want to install the jupyter-tensorboard extension which integrates nicely into Jupyter to start/stop TensorBoard servers." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "def tb(logdir=root_logdir, port=6006, open_tab=True, sleep=3):\n", + " import subprocess\n", + " proc = subprocess.Popen(\n", + " \"tensorboard --logdir={0} --port={1}\".format(logdir, port), shell=True)\n", + " if open_tab:\n", + " import time\n", + " print(\"Waiting a few seconds for the TensorBoard Server to start...\")\n", + " time.sleep(sleep)\n", + " import webbrowser\n", + " webbrowser.open(\"http://127.0.0.1:{}/\".format(port))\n", + " return proc" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "server = tb()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "run_logdir2 = get_run_logdir()\n", + "run_logdir2" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n", + " keras.layers.Dense(30, activation=\"relu\"),\n", + " keras.layers.Dense(1)\n", + "]) \n", + "model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.015))" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir2)\n", + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[checkpoint_cb, tensorboard_cb])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how TensorBoard now sees two runs, and you can compare the learning curves." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "help(keras.callbacks.TensorBoard.__init__)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "#server.kill() # uncomment and run this to stop the TensorBoard server" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hyperparameter Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):\n", + " model = keras.models.Sequential()\n", + " options = {\"input_shape\": input_shape}\n", + " for layer in range(n_hidden):\n", + " model.add(keras.layers.Dense(n_neurons, activation=\"relu\", **options))\n", + " options = {}\n", + " model.add(keras.layers.Dense(1, **options))\n", + " optimizer = keras.optimizers.SGD(learning_rate)\n", + " model.compile(loss=\"mse\", optimizer=optimizer)\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "keras_reg.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[keras.callbacks.EarlyStopping(patience=10)])" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "mse_test = keras_reg.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = keras_reg.predict(X_new)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import reciprocal\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "param_distribs = {\n", + " \"n_hidden\": [0, 1, 2, 3],\n", + " \"n_neurons\": np.arange(1, 100),\n", + " \"learning_rate\": reciprocal(3e-4, 3e-2),\n", + "}\n", + "\n", + "rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3, verbose=2)\n", + "rnd_search_cv.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=[keras.callbacks.EarlyStopping(patience=10)])" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "rnd_search_cv.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "rnd_search_cv.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "rnd_search_cv.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "rnd_search_cv.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "model = rnd_search_cv.best_estimator_.model\n", + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(X_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Exercise solutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. to 9." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "See appendix A." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 10." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TODO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 - tf2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "nav_menu": { + "height": "264px", + "width": "369px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}