handson-ml/10_neural_nets_with_keras.i...

1654 lines
41 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Chapter 10 Introduction to Artificial Neural Networks with Keras**\n",
"\n",
"_This notebook contains all the sample code and solutions to the exercises in chapter 10._"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Python ≥3.5 is required\n",
"import sys\n",
"assert sys.version_info >= (3, 5)\n",
"\n",
"# Scikit-Learn ≥0.20 is required\n",
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\"\n",
"\n",
"# TensorFlow ≥2.0-preview is required\n",
"import tensorflow as tf\n",
"assert tf.__version__ >= \"2.0\"\n",
"\n",
"# Common imports\n",
"import numpy as np\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"np.random.seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"mpl.rc('axes', labelsize=14)\n",
"mpl.rc('xtick', labelsize=12)\n",
"mpl.rc('ytick', labelsize=12)\n",
"\n",
"# Where to save the figures\n",
"PROJECT_ROOT_DIR = \".\"\n",
"CHAPTER_ID = \"ann\"\n",
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
"\n",
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
" print(\"Saving figure\", fig_id)\n",
" if tight_layout:\n",
" plt.tight_layout()\n",
" plt.savefig(path, format=fig_extension, dpi=resolution)\n",
"\n",
"# Ignore useless warnings (see SciPy issue #5998)\n",
"import warnings\n",
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Perceptrons"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note**: we set `max_iter` and `tol` explicitly to avoid warnings about the fact that their default value will change in future versions of Scikit-Learn."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.linear_model import Perceptron\n",
"\n",
"iris = load_iris()\n",
"X = iris.data[:, (2, 3)] # petal length, petal width\n",
"y = (iris.target == 0).astype(np.int)\n",
"\n",
"per_clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)\n",
"per_clf.fit(X, y)\n",
"\n",
"y_pred = per_clf.predict([[2, 0.5]])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n",
"b = -per_clf.intercept_ / per_clf.coef_[0][1]\n",
"\n",
"axes = [0, 5, 0, 2]\n",
"\n",
"x0, x1 = np.meshgrid(\n",
" np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n",
" np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n",
" )\n",
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
"y_predict = per_clf.predict(X_new)\n",
"zz = y_predict.reshape(x0.shape)\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n",
"plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n",
"\n",
"plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n",
"from matplotlib.colors import ListedColormap\n",
"custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n",
"\n",
"plt.contourf(x0, x1, zz, cmap=custom_cmap)\n",
"plt.xlabel(\"Petal length\", fontsize=14)\n",
"plt.ylabel(\"Petal width\", fontsize=14)\n",
"plt.legend(loc=\"lower right\", fontsize=14)\n",
"plt.axis(axes)\n",
"\n",
"save_fig(\"perceptron_iris_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Activation functions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def sigmoid(z):\n",
" return 1 / (1 + np.exp(-z))\n",
"\n",
"def relu(z):\n",
" return np.maximum(0, z)\n",
"\n",
"def derivative(f, z, eps=0.000001):\n",
" return (f(z + eps) - f(z - eps))/(2 * eps)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"z = np.linspace(-5, 5, 200)\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"\n",
"plt.subplot(121)\n",
"plt.plot(z, np.sign(z), \"r-\", linewidth=1, label=\"Step\")\n",
"plt.plot(z, sigmoid(z), \"g--\", linewidth=2, label=\"Sigmoid\")\n",
"plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n",
"plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
"plt.grid(True)\n",
"plt.legend(loc=\"center right\", fontsize=14)\n",
"plt.title(\"Activation functions\", fontsize=14)\n",
"plt.axis([-5, 5, -1.2, 1.2])\n",
"\n",
"plt.subplot(122)\n",
"plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=1, label=\"Step\")\n",
"plt.plot(0, 0, \"ro\", markersize=5)\n",
"plt.plot(0, 0, \"rx\", markersize=10)\n",
"plt.plot(z, derivative(sigmoid, z), \"g--\", linewidth=2, label=\"Sigmoid\")\n",
"plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n",
"plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
"plt.grid(True)\n",
"#plt.legend(loc=\"center right\", fontsize=14)\n",
"plt.title(\"Derivatives\", fontsize=14)\n",
"plt.axis([-5, 5, -0.2, 1.2])\n",
"\n",
"save_fig(\"activation_functions_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def heaviside(z):\n",
" return (z >= 0).astype(z.dtype)\n",
"\n",
"def mlp_xor(x1, x2, activation=heaviside):\n",
" return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"x1s = np.linspace(-0.2, 1.2, 100)\n",
"x2s = np.linspace(-0.2, 1.2, 100)\n",
"x1, x2 = np.meshgrid(x1s, x2s)\n",
"\n",
"z1 = mlp_xor(x1, x2, activation=heaviside)\n",
"z2 = mlp_xor(x1, x2, activation=sigmoid)\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"\n",
"plt.subplot(121)\n",
"plt.contourf(x1, x2, z1)\n",
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
"plt.title(\"Activation function: heaviside\", fontsize=14)\n",
"plt.grid(True)\n",
"\n",
"plt.subplot(122)\n",
"plt.contourf(x1, x2, z2)\n",
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
"plt.title(\"Activation function: sigmoid\", fontsize=14)\n",
"plt.grid(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Building an Image Classifier"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First let's import TensorFlow and Keras."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"tf.__version__"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"keras.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's start by loading the fashion MNIST dataset. Keras has a number of functions to load popular datasets in `keras.datasets`. The dataset is already split for you between a training set and a test set, but it can be useful to split the training set further to have a validation set:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"fashion_mnist = keras.datasets.fashion_mnist\n",
"(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The training set contains 60,000 grayscale images, each 28x28 pixels:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"X_train_full.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Each pixel intensity is represented as a byte (0 to 255):"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"X_train_full.dtype"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's split the full training set into a validation set and a (smaller) training set. We also scale the pixel intensities down to the 0-1 range and convert them to floats, by dividing by 255."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.\n",
"y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n",
"X_test = X_test / 255."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can plot an image using Matplotlib's `imshow()` function, with a `'binary'`\n",
" color map:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(X_train[0], cmap=\"binary\")\n",
"plt.axis('off')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The labels are the class IDs (represented as uint8), from 0 to 9:"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"y_train"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are the corresponding class names:"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"class_names = [\"T-shirt/top\", \"Trouser\", \"Pullover\", \"Dress\", \"Coat\",\n",
" \"Sandal\", \"Shirt\", \"Sneaker\", \"Bag\", \"Ankle boot\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So the first image in the training set is a coat:"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"class_names[y_train[0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The validation set contains 5,000 images, and the test set contains 10,000 images:"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"X_valid.shape"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"X_test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's take a look at a sample of the images in the dataset:"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"n_rows = 4\n",
"n_cols = 10\n",
"plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))\n",
"for row in range(n_rows):\n",
" for col in range(n_cols):\n",
" index = n_cols * row + col\n",
" plt.subplot(n_rows, n_cols, index + 1)\n",
" plt.imshow(X_train[index], cmap=\"binary\", interpolation=\"nearest\")\n",
" plt.axis('off')\n",
" plt.title(class_names[y_train[index]], fontsize=12)\n",
"plt.subplots_adjust(wspace=0.2, hspace=0.5)\n",
"save_fig('fashion_mnist_plot', tight_layout=False)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential()\n",
"model.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
"model.add(keras.layers.Dense(300, activation=\"relu\"))\n",
"model.add(keras.layers.Dense(100, activation=\"relu\"))\n",
"model.add(keras.layers.Dense(10, activation=\"softmax\"))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"keras.backend.clear_session()\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.Flatten(input_shape=[28, 28]),\n",
" keras.layers.Dense(300, activation=\"relu\"),\n",
" keras.layers.Dense(100, activation=\"relu\"),\n",
" keras.layers.Dense(10, activation=\"softmax\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"model.layers"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"keras.utils.plot_model(model, \"my_mnist_model.png\", show_shapes=True)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"hidden1 = model.layers[1]\n",
"hidden1.name"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"model.get_layer(hidden1.name) is hidden1"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"weights, biases = hidden1.get_weights()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"weights"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"weights.shape"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"biases"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"biases.shape"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"sparse_categorical_crossentropy\",\n",
" optimizer=\"sgd\",\n",
" metrics=[\"accuracy\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is equivalent to:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```python\n",
"model.compile(loss=keras.losses.sparse_categorical_crossentropy,\n",
" optimizer=keras.optimizers.SGD(),\n",
" metrics=[keras.metrics.sparse_categorical_accuracy])\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"history = model.fit(X_train, y_train, epochs=30,\n",
" validation_data=(X_valid, y_valid))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"history.params"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"print(history.epoch)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"history.history.keys()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"pd.DataFrame(history.history).plot(figsize=(8, 5))\n",
"plt.grid(True)\n",
"plt.gca().set_ylim(0, 1)\n",
"save_fig(\"keras_learning_curves_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"X_new = X_test[:3]\n",
"y_proba = model.predict(X_new)\n",
"y_proba.round(2)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict_classes(X_new)\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"np.array(class_names)[y_pred]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"y_new = y_test[:3]\n",
"y_new"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(7.2, 2.4))\n",
"for index, image in enumerate(X_new):\n",
" plt.subplot(1, 3, index + 1)\n",
" plt.imshow(image, cmap=\"binary\", interpolation=\"nearest\")\n",
" plt.axis('off')\n",
" plt.title(class_names[y_test[index]], fontsize=12)\n",
"plt.subplots_adjust(wspace=0.2, hspace=0.5)\n",
"save_fig('fashion_mnist_images_plot', tight_layout=False)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Regression MLP"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's load, split and scale the California housing dataset (the original one, not the modified one as in chapter 2):"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_california_housing\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"housing = fetch_california_housing()\n",
"\n",
"X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)\n",
"X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)\n",
"\n",
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(X_train)\n",
"X_valid = scaler.transform(X_valid)\n",
"X_test = scaler.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.Dense(30, activation=\"relu\", input_shape=X_train.shape[1:]),\n",
" keras.layers.Dense(1)\n",
"])\n",
"model.compile(loss=\"mean_squared_error\", optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))\n",
"mse_test = model.evaluate(X_test, y_test)\n",
"X_new = X_test[:3]\n",
"y_pred = model.predict(X_new)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(pd.DataFrame(history.history))\n",
"plt.grid(True)\n",
"plt.gca().set_ylim(0, 1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"y_pred"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Functional API"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Not all neural network models are simply sequential. Some may have complex topologies. Some may have multiple inputs and/or multiple outputs. For example, a Wide & Deep neural network (see [paper](https://ai.google/research/pubs/pub45413)) connects all or part of the inputs directly to the output layer."
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"input_ = keras.layers.Input(shape=X_train.shape[1:])\n",
"hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_)\n",
"hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n",
"concat = keras.layers.concatenate([input_, hidden2])\n",
"output = keras.layers.Dense(1)(concat)\n",
"model = keras.models.Model(inputs=[input_], outputs=[output])"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"mean_squared_error\", optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"history = model.fit(X_train, y_train, epochs=20,\n",
" validation_data=(X_valid, y_valid))\n",
"mse_test = model.evaluate(X_test, y_test)\n",
"y_pred = model.predict(X_new)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What if you want to send different subsets of input features through the wide or deep paths? We will send 5 features (features 0 to 4), and 6 through the deep path (features 2 to 7). Note that 3 features will go through both (features 2, 3 and 4)."
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"input_A = keras.layers.Input(shape=[5], name=\"wide_input\")\n",
"input_B = keras.layers.Input(shape=[6], name=\"deep_input\")\n",
"hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_B)\n",
"hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n",
"concat = keras.layers.concatenate([input_A, hidden2])\n",
"output = keras.layers.Dense(1, name=\"output\")(concat)\n",
"model = keras.models.Model(inputs=[input_A, input_B], outputs=[output])"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"\n",
"X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]\n",
"X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]\n",
"X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]\n",
"X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]\n",
"\n",
"history = model.fit((X_train_A, X_train_B), y_train, epochs=20,\n",
" validation_data=((X_valid_A, X_valid_B), y_valid))\n",
"mse_test = model.evaluate((X_test_A, X_test_B), y_test)\n",
"y_pred = model.predict((X_new_A, X_new_B))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding an auxiliary output for regularization:"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"input_A = keras.layers.Input(shape=[5], name=\"wide_input\")\n",
"input_B = keras.layers.Input(shape=[6], name=\"deep_input\")\n",
"hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_B)\n",
"hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n",
"concat = keras.layers.concatenate([input_A, hidden2])\n",
"output = keras.layers.Dense(1, name=\"main_output\")(concat)\n",
"aux_output = keras.layers.Dense(1, name=\"aux_output\")(hidden2)\n",
"model = keras.models.Model(inputs=[input_A, input_B],\n",
" outputs=[output, aux_output])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=[\"mse\", \"mse\"], loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(lr=1e-3))"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,\n",
" validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"total_loss, main_loss, aux_loss = model.evaluate(\n",
" [X_test_A, X_test_B], [y_test, y_test])\n",
"y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# The subclassing API"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"class WideAndDeepModel(keras.models.Model):\n",
" def __init__(self, units=30, activation=\"relu\", **kwargs):\n",
" super().__init__(**kwargs)\n",
" self.hidden1 = keras.layers.Dense(units, activation=activation)\n",
" self.hidden2 = keras.layers.Dense(units, activation=activation)\n",
" self.main_output = keras.layers.Dense(1)\n",
" self.aux_output = keras.layers.Dense(1)\n",
" \n",
" def call(self, inputs):\n",
" input_A, input_B = inputs\n",
" hidden1 = self.hidden1(input_B)\n",
" hidden2 = self.hidden2(hidden1)\n",
" concat = keras.layers.concatenate([input_A, hidden2])\n",
" main_output = self.main_output(concat)\n",
" aux_output = self.aux_output(hidden2)\n",
" return main_output, aux_output\n",
"\n",
"model = WideAndDeepModel(30, activation=\"relu\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"mse\", loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"history = model.fit((X_train_A, X_train_B), (y_train, y_train), epochs=10,\n",
" validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))\n",
"total_loss, main_loss, aux_loss = model.evaluate((X_test_A, X_test_B), (y_test, y_test))\n",
"y_pred_main, y_pred_aux = model.predict((X_new_A, X_new_B))"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"model = WideAndDeepModel(30, activation=\"relu\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Saving and Restoring"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
" keras.layers.Dense(30, activation=\"relu\"),\n",
" keras.layers.Dense(1)\n",
"]) "
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))\n",
"mse_test = model.evaluate(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"model.save(\"my_keras_model.h5\")"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.load_model(\"my_keras_model.h5\")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"model.predict(X_new)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"model.save_weights(\"my_keras_weights.ckpt\")"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"model.load_weights(\"my_keras_weights.ckpt\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using Callbacks during Training"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"keras.backend.clear_session()\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
" keras.layers.Dense(30, activation=\"relu\"),\n",
" keras.layers.Dense(1)\n",
"]) "
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_keras_model.h5\", save_best_only=True)\n",
"history = model.fit(X_train, y_train, epochs=10,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[checkpoint_cb])\n",
"model = keras.models.load_model(\"my_keras_model.h5\") # rollback to best model\n",
"mse_test = model.evaluate(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1e-3))\n",
"early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,\n",
" restore_best_weights=True)\n",
"history = model.fit(X_train, y_train, epochs=100,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[checkpoint_cb, early_stopping_cb])\n",
"mse_test = model.evaluate(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"class PrintValTrainRatioCallback(keras.callbacks.Callback):\n",
" def on_epoch_end(self, epoch, logs):\n",
" print(\"\\nval/train: {:.2f}\".format(logs[\"val_loss\"] / logs[\"loss\"]))"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"val_train_ratio_cb = PrintValTrainRatioCallback()\n",
"history = model.fit(X_train, y_train, epochs=1,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[val_train_ratio_cb])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TensorBoard"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"root_logdir = os.path.join(os.curdir, \"my_logs\")"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"def get_run_logdir():\n",
" import time\n",
" run_id = time.strftime(\"run_%Y_%m_%d-%H_%M_%S\")\n",
" return os.path.join(root_logdir, run_id)\n",
"\n",
"run_logdir = get_run_logdir()\n",
"run_logdir"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"keras.backend.clear_session()\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
" keras.layers.Dense(30, activation=\"relu\"),\n",
" keras.layers.Dense(1)\n",
"]) \n",
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=1e-3))"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
"history = model.fit(X_train, y_train, epochs=30,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[checkpoint_cb, tensorboard_cb])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To start the TensorBoard server, one option is to open a terminal, if needed activate the virtualenv where you installed TensorBoard, go to this notebook's directory, then type:\n",
"\n",
"```bash\n",
"$ tensorboard --logdir=./my_logs --port=6006\n",
"```\n",
"\n",
"You can then open your web browser to [localhost:6006](http://localhost:6006) and use TensorBoard. Once you are done, press Ctrl-C in the terminal window, this will shutdown the TensorBoard server.\n",
"\n",
"Alternatively, you can load TensorBoard's Jupyter extension and run it like this:"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"%load_ext tensorboard\n",
"%tensorboard --logdir=./my_logs --port=6006"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"run_logdir2 = get_run_logdir()\n",
"run_logdir2"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"keras.backend.clear_session()\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential([\n",
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
" keras.layers.Dense(30, activation=\"relu\"),\n",
" keras.layers.Dense(1)\n",
"]) \n",
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.05))"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir2)\n",
"history = model.fit(X_train, y_train, epochs=30,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[checkpoint_cb, tensorboard_cb])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notice how TensorBoard now sees two runs, and you can compare the learning curves."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Check out the other available logging options:"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"help(keras.callbacks.TensorBoard.__init__)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hyperparameter Tuning"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"keras.backend.clear_session()\n",
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):\n",
" model = keras.models.Sequential()\n",
" model.add(keras.layers.InputLayer(input_shape=input_shape))\n",
" for layer in range(n_hidden):\n",
" model.add(keras.layers.Dense(n_neurons, activation=\"relu\"))\n",
" model.add(keras.layers.Dense(1))\n",
" optimizer = keras.optimizers.SGD(lr=learning_rate)\n",
" model.compile(loss=\"mse\", optimizer=optimizer)\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"keras_reg.fit(X_train, y_train, epochs=100,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[keras.callbacks.EarlyStopping(patience=10)])"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"mse_test = keras_reg.score(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"y_pred = keras_reg.predict(X_new)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"tf.random.set_seed(42)"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import reciprocal\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"\n",
"param_distribs = {\n",
" \"n_hidden\": [0, 1, 2, 3],\n",
" \"n_neurons\": np.arange(1, 100),\n",
" \"learning_rate\": reciprocal(3e-4, 3e-2),\n",
"}\n",
"\n",
"rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3, verbose=2)\n",
"rnd_search_cv.fit(X_train, y_train, epochs=100,\n",
" validation_data=(X_valid, y_valid),\n",
" callbacks=[keras.callbacks.EarlyStopping(patience=10)])"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
"rnd_search_cv.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"rnd_search_cv.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"rnd_search_cv.best_estimator_"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"rnd_search_cv.score(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"model = rnd_search_cv.best_estimator_.model\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"model.evaluate(X_test, y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Exercise solutions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. to 9."
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"See appendix A."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 10."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"nav_menu": {
"height": "264px",
"width": "369px"
},
"toc": {
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
}