1702 lines
42 KiB
Plaintext
1702 lines
42 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Chapter 10 – Introduction to Artificial Neural Networks with Keras**\n",
|
||
"\n",
|
||
"_This notebook contains all the sample code and solutions to the exercises in chapter 10._"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Python ≥3.5 is required\n",
|
||
"import sys\n",
|
||
"assert sys.version_info >= (3, 5)\n",
|
||
"\n",
|
||
"# Scikit-Learn ≥0.20 is required\n",
|
||
"import sklearn\n",
|
||
"assert sklearn.__version__ >= \"0.20\"\n",
|
||
"\n",
|
||
"# TensorFlow ≥2.0-preview is required\n",
|
||
"import tensorflow as tf\n",
|
||
"assert tf.__version__ >= \"2.0\"\n",
|
||
"\n",
|
||
"# Common imports\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"\n",
|
||
"# to make this notebook's output stable across runs\n",
|
||
"np.random.seed(42)\n",
|
||
"\n",
|
||
"# To plot pretty figures\n",
|
||
"%matplotlib inline\n",
|
||
"import matplotlib as mpl\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"mpl.rc('axes', labelsize=14)\n",
|
||
"mpl.rc('xtick', labelsize=12)\n",
|
||
"mpl.rc('ytick', labelsize=12)\n",
|
||
"\n",
|
||
"# Where to save the figures\n",
|
||
"PROJECT_ROOT_DIR = \".\"\n",
|
||
"CHAPTER_ID = \"ann\"\n",
|
||
"IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n",
|
||
"os.makedirs(IMAGES_PATH, exist_ok=True)\n",
|
||
"\n",
|
||
"def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n",
|
||
" path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n",
|
||
" print(\"Saving figure\", fig_id)\n",
|
||
" if tight_layout:\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.savefig(path, format=fig_extension, dpi=resolution)\n",
|
||
"\n",
|
||
"# Ignore useless warnings (see SciPy issue #5998)\n",
|
||
"import warnings\n",
|
||
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Perceptrons"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Note**: we set `max_iter` and `tol` explicitly to avoid warnings about the fact that their default value will change in future versions of Scikit-Learn."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"from sklearn.datasets import load_iris\n",
|
||
"from sklearn.linear_model import Perceptron\n",
|
||
"\n",
|
||
"iris = load_iris()\n",
|
||
"X = iris.data[:, (2, 3)] # petal length, petal width\n",
|
||
"y = (iris.target == 0).astype(np.int)\n",
|
||
"\n",
|
||
"per_clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)\n",
|
||
"per_clf.fit(X, y)\n",
|
||
"\n",
|
||
"y_pred = per_clf.predict([[2, 0.5]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n",
|
||
"b = -per_clf.intercept_ / per_clf.coef_[0][1]\n",
|
||
"\n",
|
||
"axes = [0, 5, 0, 2]\n",
|
||
"\n",
|
||
"x0, x1 = np.meshgrid(\n",
|
||
" np.linspace(axes[0], axes[1], 500).reshape(-1, 1),\n",
|
||
" np.linspace(axes[2], axes[3], 200).reshape(-1, 1),\n",
|
||
" )\n",
|
||
"X_new = np.c_[x0.ravel(), x1.ravel()]\n",
|
||
"y_predict = per_clf.predict(X_new)\n",
|
||
"zz = y_predict.reshape(x0.shape)\n",
|
||
"\n",
|
||
"plt.figure(figsize=(10, 4))\n",
|
||
"plt.plot(X[y==0, 0], X[y==0, 1], \"bs\", label=\"Not Iris-Setosa\")\n",
|
||
"plt.plot(X[y==1, 0], X[y==1, 1], \"yo\", label=\"Iris-Setosa\")\n",
|
||
"\n",
|
||
"plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], \"k-\", linewidth=3)\n",
|
||
"from matplotlib.colors import ListedColormap\n",
|
||
"custom_cmap = ListedColormap(['#9898ff', '#fafab0'])\n",
|
||
"\n",
|
||
"plt.contourf(x0, x1, zz, cmap=custom_cmap)\n",
|
||
"plt.xlabel(\"Petal length\", fontsize=14)\n",
|
||
"plt.ylabel(\"Petal width\", fontsize=14)\n",
|
||
"plt.legend(loc=\"lower right\", fontsize=14)\n",
|
||
"plt.axis(axes)\n",
|
||
"\n",
|
||
"save_fig(\"perceptron_iris_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Activation functions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def sigmoid(z):\n",
|
||
" return 1 / (1 + np.exp(-z))\n",
|
||
"\n",
|
||
"def relu(z):\n",
|
||
" return np.maximum(0, z)\n",
|
||
"\n",
|
||
"def derivative(f, z, eps=0.000001):\n",
|
||
" return (f(z + eps) - f(z - eps))/(2 * eps)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"z = np.linspace(-5, 5, 200)\n",
|
||
"\n",
|
||
"plt.figure(figsize=(11,4))\n",
|
||
"\n",
|
||
"plt.subplot(121)\n",
|
||
"plt.plot(z, np.sign(z), \"r-\", linewidth=1, label=\"Step\")\n",
|
||
"plt.plot(z, sigmoid(z), \"g--\", linewidth=2, label=\"Sigmoid\")\n",
|
||
"plt.plot(z, np.tanh(z), \"b-\", linewidth=2, label=\"Tanh\")\n",
|
||
"plt.plot(z, relu(z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
|
||
"plt.grid(True)\n",
|
||
"plt.legend(loc=\"center right\", fontsize=14)\n",
|
||
"plt.title(\"Activation functions\", fontsize=14)\n",
|
||
"plt.axis([-5, 5, -1.2, 1.2])\n",
|
||
"\n",
|
||
"plt.subplot(122)\n",
|
||
"plt.plot(z, derivative(np.sign, z), \"r-\", linewidth=1, label=\"Step\")\n",
|
||
"plt.plot(0, 0, \"ro\", markersize=5)\n",
|
||
"plt.plot(0, 0, \"rx\", markersize=10)\n",
|
||
"plt.plot(z, derivative(sigmoid, z), \"g--\", linewidth=2, label=\"Sigmoid\")\n",
|
||
"plt.plot(z, derivative(np.tanh, z), \"b-\", linewidth=2, label=\"Tanh\")\n",
|
||
"plt.plot(z, derivative(relu, z), \"m-.\", linewidth=2, label=\"ReLU\")\n",
|
||
"plt.grid(True)\n",
|
||
"#plt.legend(loc=\"center right\", fontsize=14)\n",
|
||
"plt.title(\"Derivatives\", fontsize=14)\n",
|
||
"plt.axis([-5, 5, -0.2, 1.2])\n",
|
||
"\n",
|
||
"save_fig(\"activation_functions_plot\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def heaviside(z):\n",
|
||
" return (z >= 0).astype(z.dtype)\n",
|
||
"\n",
|
||
"def mlp_xor(x1, x2, activation=heaviside):\n",
|
||
" return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"x1s = np.linspace(-0.2, 1.2, 100)\n",
|
||
"x2s = np.linspace(-0.2, 1.2, 100)\n",
|
||
"x1, x2 = np.meshgrid(x1s, x2s)\n",
|
||
"\n",
|
||
"z1 = mlp_xor(x1, x2, activation=heaviside)\n",
|
||
"z2 = mlp_xor(x1, x2, activation=sigmoid)\n",
|
||
"\n",
|
||
"plt.figure(figsize=(10,4))\n",
|
||
"\n",
|
||
"plt.subplot(121)\n",
|
||
"plt.contourf(x1, x2, z1)\n",
|
||
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
|
||
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
|
||
"plt.title(\"Activation function: heaviside\", fontsize=14)\n",
|
||
"plt.grid(True)\n",
|
||
"\n",
|
||
"plt.subplot(122)\n",
|
||
"plt.contourf(x1, x2, z2)\n",
|
||
"plt.plot([0, 1], [0, 1], \"gs\", markersize=20)\n",
|
||
"plt.plot([0, 1], [1, 0], \"y^\", markersize=20)\n",
|
||
"plt.title(\"Activation function: sigmoid\", fontsize=14)\n",
|
||
"plt.grid(True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Building an Image Classifier"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"First let's import TensorFlow and Keras."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import tensorflow as tf\n",
|
||
"from tensorflow import keras"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tf.__version__"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"keras.__version__"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's start by loading the fashion MNIST dataset. Keras has a number of functions to load popular datasets in `keras.datasets`. The dataset is already split for you between a training set and a test set, but it can be useful to split the training set further to have a validation set:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"fashion_mnist = keras.datasets.fashion_mnist\n",
|
||
"(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The training set contains 60,000 grayscale images, each 28x28 pixels:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train_full.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Each pixel intensity is represented as a byte (0 to 255):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train_full.dtype"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's split the full training set into a validation set and a (smaller) training set. We also scale the pixel intensities down to the 0-1 range and convert them to floats, by dividing by 255."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.\n",
|
||
"y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n",
|
||
"X_test = X_test / 255."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"You can plot an image using Matplotlib's `imshow()` function, with a `'binary'`\n",
|
||
" color map:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.imshow(X_train[0], cmap=\"binary\")\n",
|
||
"plt.axis('off')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The labels are the class IDs (represented as uint8), from 0 to 9:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Here are the corresponding class names:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class_names = [\"T-shirt/top\", \"Trouser\", \"Pullover\", \"Dress\", \"Coat\",\n",
|
||
" \"Sandal\", \"Shirt\", \"Sneaker\", \"Bag\", \"Ankle boot\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"So the first image in the training set is a coat:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class_names[y_train[0]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"The validation set contains 5,000 images, and the test set contains 10,000 images:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_valid.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_test.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's take a look at a sample of the images in the dataset:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"n_rows = 4\n",
|
||
"n_cols = 10\n",
|
||
"plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))\n",
|
||
"for row in range(n_rows):\n",
|
||
" for col in range(n_cols):\n",
|
||
" index = n_cols * row + col\n",
|
||
" plt.subplot(n_rows, n_cols, index + 1)\n",
|
||
" plt.imshow(X_train[index], cmap=\"binary\", interpolation=\"nearest\")\n",
|
||
" plt.axis('off')\n",
|
||
" plt.title(class_names[y_train[index]], fontsize=12)\n",
|
||
"plt.subplots_adjust(wspace=0.2, hspace=0.5)\n",
|
||
"save_fig('fashion_mnist_diagram', tight_layout=False)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential()\n",
|
||
"model.add(keras.layers.Flatten(input_shape=[28, 28]))\n",
|
||
"model.add(keras.layers.Dense(300, activation=\"relu\"))\n",
|
||
"model.add(keras.layers.Dense(100, activation=\"relu\"))\n",
|
||
"model.add(keras.layers.Dense(10, activation=\"softmax\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Flatten(input_shape=[28, 28]),\n",
|
||
" keras.layers.Dense(300, activation=\"relu\"),\n",
|
||
" keras.layers.Dense(100, activation=\"relu\"),\n",
|
||
" keras.layers.Dense(10, activation=\"softmax\")\n",
|
||
"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.layers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.summary()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Warning**: the following 2 cells do not work yet due to [TensorFlow issue 24622](https://github.com/tensorflow/tensorflow/issues/24622) (you are using a preview version of TensorFlow, hence there are still a few issues).\n",
|
||
"You can work around this issue by applying [PR 24626](https://github.com/tensorflow/tensorflow/pull/24625/files) to your copy of `tensorflow/python/keras/utils/vis_utils.py`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#keras.utils.plot_model(model, \"my_mnist_model.png\", show_shapes=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%%html\n",
|
||
"<img src=\"my_mnist_model.png\" />"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"**Warning**: the following cell does not work yet due to [TensorFlow issue 24622](https://github.com/tensorflow/tensorflow/issues/24622) and [TensorFlow issue 24639](https://github.com/tensorflow/tensorflow/issues/24639).\n",
|
||
"You can work around issue 24639 by writing `from tensorflow.keras.utils.vis_utils import model_to_dot`."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from IPython.display import SVG\n",
|
||
"#SVG(keras.utils.model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"hidden1 = model.layers[1]\n",
|
||
"hidden1.name"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.get_layer(hidden1.name).name"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"weights, biases = hidden1.get_weights()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"weights"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"weights.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"biases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"biases.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"sparse_categorical_crossentropy\",\n",
|
||
" optimizer=\"sgd\",\n",
|
||
" metrics=[\"accuracy\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"This is equivalent to:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=keras.losses.sparse_categorical_crossentropy,\n",
|
||
" optimizer=keras.optimizers.SGD(),\n",
|
||
" metrics=[keras.metrics.sparse_categorical_accuracy])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history = model.fit(X_train, y_train, epochs=50,\n",
|
||
" validation_data=(X_valid, y_valid))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history.params"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(history.epoch)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history.history.keys()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"pd.DataFrame(history.history).plot(figsize=(8, 5))\n",
|
||
"plt.grid(True)\n",
|
||
"plt.gca().set_ylim(0, 1)\n",
|
||
"save_fig(\"keras_learning_curves_graph\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_new = X_test[:3]\n",
|
||
"y_proba = model.predict(X_new)\n",
|
||
"y_proba.round(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = model.predict_classes(X_new)\n",
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.array(class_names)[y_pred]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_new = y_test[:3]\n",
|
||
"y_new"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Regression MLP"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Let's load, split and scale the California housing dataset (the original one, not the modified one as in chapter 2):"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.datasets import fetch_california_housing\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"\n",
|
||
"housing = fetch_california_housing()\n",
|
||
"\n",
|
||
"X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)\n",
|
||
"X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)\n",
|
||
"\n",
|
||
"scaler = StandardScaler()\n",
|
||
"X_train = scaler.fit_transform(X_train)\n",
|
||
"X_valid = scaler.transform(X_valid)\n",
|
||
"X_test = scaler.transform(X_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Dense(30, activation=\"relu\", input_shape=X_train.shape[1:]),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"])\n",
|
||
"model.compile(loss=\"mean_squared_error\", optimizer=\"sgd\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))\n",
|
||
"mse_test = model.evaluate(X_test, y_test)\n",
|
||
"X_new = X_test[:3]\n",
|
||
"y_pred = model.predict(X_new)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"plt.plot(pd.DataFrame(history.history))\n",
|
||
"plt.grid(True)\n",
|
||
"plt.gca().set_ylim(0, 1)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Functional API"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Not all neural network models are simply sequential. Some may have complex topologies. Some may have multiple inputs and/or multiple outputs. For example, a Wide & Deep neural network (see [paper](https://ai.google/research/pubs/pub45413)) connects all or part of the inputs directly to the output layer."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"input = keras.layers.Input(shape=X_train.shape[1:])\n",
|
||
"hidden1 = keras.layers.Dense(30, activation=\"relu\")(input)\n",
|
||
"hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n",
|
||
"concat = keras.layers.concatenate([input, hidden2])\n",
|
||
"output = keras.layers.Dense(1)(concat)\n",
|
||
"model = keras.models.Model(inputs=[input], outputs=[output])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.summary()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mean_squared_error\", optimizer=\"sgd\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=20,\n",
|
||
" validation_data=(X_valid, y_valid))\n",
|
||
"mse_test = model.evaluate(X_test, y_test)\n",
|
||
"y_pred = model.predict(X_new)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"What if you want to send different subsets of input features through the wide or deep paths? We will send 5 features (features 0 to 4), and 6 through the deep path (features 2 to 7). Note that 3 features will go through both (features 2, 3 and 4)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"input_A = keras.layers.Input(shape=[5])\n",
|
||
"input_B = keras.layers.Input(shape=[6])\n",
|
||
"hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_B)\n",
|
||
"hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n",
|
||
"concat = keras.layers.concatenate([input_A, hidden2])\n",
|
||
"output = keras.layers.Dense(1)(concat)\n",
|
||
"model = keras.models.Model(inputs=[input_A, input_B], outputs=[output])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"sgd\")\n",
|
||
"\n",
|
||
"X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]\n",
|
||
"X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]\n",
|
||
"X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]\n",
|
||
"X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]\n",
|
||
"\n",
|
||
"history = model.fit((X_train_A, X_train_B), y_train, epochs=20,\n",
|
||
" validation_data=((X_valid_A, X_valid_B), y_valid))\n",
|
||
"mse_test = model.evaluate((X_test_A, X_test_B), y_test)\n",
|
||
"y_pred = model.predict((X_new_A, X_new_B))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Adding an auxiliary output for regularization:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"input_A = keras.layers.Input(shape=[5])\n",
|
||
"input_B = keras.layers.Input(shape=[6])\n",
|
||
"hidden1 = keras.layers.Dense(30, activation=\"relu\")(input_B)\n",
|
||
"hidden2 = keras.layers.Dense(30, activation=\"relu\")(hidden1)\n",
|
||
"concat = keras.layers.concatenate([input_A, hidden2])\n",
|
||
"output = keras.layers.Dense(1)(concat)\n",
|
||
"aux_output = keras.layers.Dense(1)(hidden2)\n",
|
||
"model = keras.models.Model(inputs=[input_A, input_B],\n",
|
||
" outputs=[output, aux_output])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=[\"mse\", \"mse\"], loss_weights=[0.9, 0.1], optimizer=\"sgd\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,\n",
|
||
" validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"total_loss, main_loss, aux_loss = model.evaluate(\n",
|
||
" [X_test_A, X_test_B], [y_test, y_test])\n",
|
||
"y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# The subclassing API"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class WideAndDeepModel(keras.models.Model):\n",
|
||
" def __init__(self, units=30, activation=\"relu\"):\n",
|
||
" super().__init__()\n",
|
||
" self.hidden1 = keras.layers.Dense(units, activation=activation)\n",
|
||
" self.hidden2 = keras.layers.Dense(units, activation=activation)\n",
|
||
" self.main_output = keras.layers.Dense(1)\n",
|
||
" self.aux_output = keras.layers.Dense(1)\n",
|
||
" \n",
|
||
" def call(self, inputs):\n",
|
||
" input_A, input_B = inputs\n",
|
||
" hidden1 = self.hidden1(input_B)\n",
|
||
" hidden2 = self.hidden2(hidden1)\n",
|
||
" concat = keras.layers.concatenate([input_A, hidden2])\n",
|
||
" main_output = self.main_output(concat)\n",
|
||
" aux_output = self.aux_output(hidden2)\n",
|
||
" return main_output, aux_output\n",
|
||
"\n",
|
||
"model = WideAndDeepModel(30, activation=\"relu\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", loss_weights=[0.9, 0.1], optimizer=\"sgd\")\n",
|
||
"history = model.fit((X_train_A, X_train_B), (y_train, y_train), epochs=10,\n",
|
||
" validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))\n",
|
||
"total_loss, main_loss, aux_loss = model.evaluate((X_test_A, X_test_B), (y_test, y_test))\n",
|
||
"y_pred_main, y_pred_aux = model.predict((X_new_A, X_new_B))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = WideAndDeepModel(30, activation=\"relu\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Saving and Restoring"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
|
||
" keras.layers.Dense(30, activation=\"relu\"),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"]) "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"sgd\")\n",
|
||
"history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))\n",
|
||
"mse_test = model.evaluate(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save(\"my_keras_model.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.load_model(\"my_keras_model.h5\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.predict(X_new)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.save_weights(\"my_keras_weights.ckpt\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.load_weights(\"my_keras_weights.ckpt\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Using Callbacks during Training"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
|
||
" keras.layers.Dense(30, activation=\"relu\"),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"]) "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"sgd\")\n",
|
||
"checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_keras_model.h5\", save_best_only=True)\n",
|
||
"history = model.fit(X_train, y_train, epochs=10,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[checkpoint_cb])\n",
|
||
"model = keras.models.load_model(\"my_keras_model.h5\") # rollback to best model\n",
|
||
"mse_test = model.evaluate(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.compile(loss=\"mse\", optimizer=\"sgd\")\n",
|
||
"early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,\n",
|
||
" restore_best_weights=True)\n",
|
||
"history = model.fit(X_train, y_train, epochs=100,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[checkpoint_cb, early_stopping_cb])\n",
|
||
"mse_test = model.evaluate(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"class PrintValTrainRatioCallback(keras.callbacks.Callback):\n",
|
||
" def on_epoch_end(self, epoch, logs):\n",
|
||
" print(\"\\nval/train: {:.2f}\".format(logs[\"val_loss\"] / logs[\"loss\"]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"val_train_ratio_cb = PrintValTrainRatioCallback()\n",
|
||
"history = model.fit(X_train, y_train, epochs=1,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[val_train_ratio_cb])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# TensorBoard"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"root_logdir = os.path.join(os.curdir, \"my_logs\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def get_run_logdir():\n",
|
||
" import time\n",
|
||
" run_id = time.strftime(\"run_%Y_%m_%d-%H_%M_%S\")\n",
|
||
" return os.path.join(root_logdir, run_id)\n",
|
||
"\n",
|
||
"run_logdir = get_run_logdir()\n",
|
||
"run_logdir"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
|
||
" keras.layers.Dense(30, activation=\"relu\"),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"]) \n",
|
||
"#model.compile(loss=\"mse\", optimizer=\"sgd\")\n",
|
||
"# or try another learning rate:\n",
|
||
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.05))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
|
||
"history = model.fit(X_train, y_train, epochs=30,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[checkpoint_cb, tensorboard_cb])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"To start the TensorBoard server, one option is to open a terminal, if needed activate the virtualenv where you installed TensorBoard, then type:\n",
|
||
"\n",
|
||
"```bash\n",
|
||
"$ tensorboard --logdir=./my_logs --port=6006\n",
|
||
"```\n",
|
||
"\n",
|
||
"You can then open your web browser to [localhost:6006](http://localhost:6006) and use TensorBoard. Once you are done, press Ctrl-C in the terminal window, this will shutdown the TensorBoard server.\n",
|
||
"\n",
|
||
"Alternatively, you can create a Jupyter cell with this code:\n",
|
||
"\n",
|
||
"```bash\n",
|
||
"%%bash\n",
|
||
"tensorboard --logdir={run_logdir} --port=6006\n",
|
||
"```\n",
|
||
"\n",
|
||
"When you run this cell, the TensorBoard server will start and you can use it at [localhost:6006](http://localhost:6006), but Jupyter will be blocked until you interrupt this cell, which will shutdown the server.\n",
|
||
"\n",
|
||
"Lastly, you can use the following `tb()` function that starts the TensorBoard server in a way that does not block Jupyter, and directly opens a new browser tab for you. It returns a handle on the server's process, so you can call `server.kill()` when you want to shutdown the server. Note that interrupting this notebook will shutdown all TensorBoard servers that you started this way.\n",
|
||
"\n",
|
||
"You may also want to install the jupyter-tensorboard extension which integrates nicely into Jupyter to start/stop TensorBoard servers."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def tb(logdir=root_logdir, port=6006, open_tab=True, sleep=3):\n",
|
||
" import subprocess\n",
|
||
" proc = subprocess.Popen(\n",
|
||
" \"tensorboard --logdir={0} --port={1}\".format(logdir, port), shell=True)\n",
|
||
" if open_tab:\n",
|
||
" import time\n",
|
||
" print(\"Waiting a few seconds for the TensorBoard Server to start...\")\n",
|
||
" time.sleep(sleep)\n",
|
||
" import webbrowser\n",
|
||
" webbrowser.open(\"http://127.0.0.1:{}/\".format(port))\n",
|
||
" return proc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"server = tb()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"run_logdir2 = get_run_logdir()\n",
|
||
"run_logdir2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = keras.models.Sequential([\n",
|
||
" keras.layers.Dense(30, activation=\"relu\", input_shape=[8]),\n",
|
||
" keras.layers.Dense(30, activation=\"relu\"),\n",
|
||
" keras.layers.Dense(1)\n",
|
||
"]) \n",
|
||
"model.compile(loss=\"mse\", optimizer=keras.optimizers.SGD(lr=0.015))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir2)\n",
|
||
"history = model.fit(X_train, y_train, epochs=10,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[checkpoint_cb, tensorboard_cb])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Notice how TensorBoard now sees two runs, and you can compare the learning curves."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"help(keras.callbacks.TensorBoard.__init__)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#server.kill() # uncomment and run this to stop the TensorBoard server"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Hyperparameter Tuning"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):\n",
|
||
" model = keras.models.Sequential()\n",
|
||
" options = {\"input_shape\": input_shape}\n",
|
||
" for layer in range(n_hidden):\n",
|
||
" model.add(keras.layers.Dense(n_neurons, activation=\"relu\", **options))\n",
|
||
" options = {}\n",
|
||
" model.add(keras.layers.Dense(1, **options))\n",
|
||
" optimizer = keras.optimizers.SGD(learning_rate)\n",
|
||
" model.compile(loss=\"mse\", optimizer=optimizer)\n",
|
||
" return model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"keras_reg.fit(X_train, y_train, epochs=100,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[keras.callbacks.EarlyStopping(patience=10)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"mse_test = keras_reg.score(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred = keras_reg.predict(X_new)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 104,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"np.random.seed(42)\n",
|
||
"tf.random.set_seed(42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from scipy.stats import reciprocal\n",
|
||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||
"\n",
|
||
"param_distribs = {\n",
|
||
" \"n_hidden\": [0, 1, 2, 3],\n",
|
||
" \"n_neurons\": np.arange(1, 100),\n",
|
||
" \"learning_rate\": reciprocal(3e-4, 3e-2),\n",
|
||
"}\n",
|
||
"\n",
|
||
"rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3, verbose=2)\n",
|
||
"rnd_search_cv.fit(X_train, y_train, epochs=100,\n",
|
||
" validation_data=(X_valid, y_valid),\n",
|
||
" callbacks=[keras.callbacks.EarlyStopping(patience=10)])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 106,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"rnd_search_cv.best_params_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 107,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"rnd_search_cv.best_score_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"rnd_search_cv.best_estimator_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 109,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"rnd_search_cv.score(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model = rnd_search_cv.best_estimator_.model\n",
|
||
"model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 111,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"model.evaluate(X_test, y_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"source": [
|
||
"# Exercise solutions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 1. to 9."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"source": [
|
||
"See appendix A."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 10."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"TODO"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.8"
|
||
},
|
||
"nav_menu": {
|
||
"height": "264px",
|
||
"width": "369px"
|
||
},
|
||
"toc": {
|
||
"navigate_menu": true,
|
||
"number_sections": true,
|
||
"sideBar": true,
|
||
"threshold": 6,
|
||
"toc_cell": false,
|
||
"toc_section_display": "block",
|
||
"toc_window_display": false
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 1
|
||
}
|