From f69cbc8d2731a1b63f2f69302931e9e7b6e896b9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 8 Sep 2021 00:42:46 +0000
Subject: [PATCH 01/16] Bump pillow from 8.2.0 to 8.3.2

Bumps [pillow](https://github.com/python-pillow/Pillow) from 8.2.0 to 8.3.2.
- [Release notes](https://github.com/python-pillow/Pillow/releases)
- [Changelog](https://github.com/python-pillow/Pillow/blob/master/CHANGES.rst)
- [Commits](https://github.com/python-pillow/Pillow/compare/8.2.0...8.3.2)

---
updated-dependencies:
- dependency-name: pillow
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a3f7175..767ded9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -56,7 +56,7 @@ gym[atari,Box2D]==0.18.0
 tf-agents==0.7.1
 
 ##### Image manipulation
-Pillow==8.2.0
+Pillow==8.3.2
 graphviz==0.16
 opencv-python==4.5.1.48
 pyglet==1.5.0

From 995a980df8f63ce91f593a2b8d311d65526b4a3c Mon Sep 17 00:00:00 2001
From: austin-chan <4649835+austin-chan@users.noreply.github.com>
Date: Sat, 11 Sep 2021 12:21:27 -0700
Subject: [PATCH 02/16] [Chapter 11] Text fix for l1 l2 regularization section

---
 11_training_deep_neural_networks.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/11_training_deep_neural_networks.ipynb b/11_training_deep_neural_networks.ipynb
index 747d64e..e9fdc9f 100644
--- a/11_training_deep_neural_networks.ipynb
+++ b/11_training_deep_neural_networks.ipynb
@@ -1777,7 +1777,7 @@
     "layer = keras.layers.Dense(100, activation=\"elu\",\n",
     "                           kernel_initializer=\"he_normal\",\n",
     "                           kernel_regularizer=keras.regularizers.l2(0.01))\n",
-    "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n",
+    "# or l1(0.1) for ℓ1 regularization with a factor of 0.1\n",
     "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively"
    ]
   },

From 5976ed3d1ee60ea0852d956492460d161e29393e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sun, 19 Sep 2021 17:07:47 +1200
Subject: [PATCH 03/16] Replace tfhub.dev/google/tf2-preview/nnlm-en-dim50/1
 with tfhub.dev/google/nnlm-en-dim50/2

---
 13_loading_and_preprocessing_data.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/13_loading_and_preprocessing_data.ipynb b/13_loading_and_preprocessing_data.ipynb
index 02ab9bb..1299aec 100644
--- a/13_loading_and_preprocessing_data.ipynb
+++ b/13_loading_and_preprocessing_data.ipynb
@@ -1855,7 +1855,7 @@
    "source": [
     "import tensorflow_hub as hub\n",
     "\n",
-    "hub_layer = hub.KerasLayer(\"https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1\",\n",
+    "hub_layer = hub.KerasLayer(\"https://tfhub.dev/google/nnlm-en-dim50/2\",\n",
     "                           output_shape=[50], input_shape=[], dtype=tf.string)\n",
     "\n",
     "model = keras.Sequential()\n",

From edecc171a25e8bc408e91c32a756f64dd25a2636 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Tue, 21 Sep 2021 16:37:29 +1200
Subject: [PATCH 04/16] Use atari_py==0.2.6, as later versions don't have ROMs

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 609a850..f485394 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - atari_py=0.2 # used only in chapter 18
+  - atari_py=0.2.6 # used only in chapter 18
   - box2d-py=2.3 # used only in chapter 18
   - ftfy=5.8 # used only in chapter 16 by the transformers library
   - graphviz # used only in chapter 6 for dot files

From 7fc088e23d8a6da748953be36b9cb491d60f7f56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Tue, 21 Sep 2021 16:41:01 +1200
Subject: [PATCH 05/16] Use atari_py version with ROMs

---
 18_reinforcement_learning.ipynb | 3 ++-
 requirements.txt                | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/18_reinforcement_learning.ipynb b/18_reinforcement_learning.ipynb
index 4d36515..5e188d1 100644
--- a/18_reinforcement_learning.ipynb
+++ b/18_reinforcement_learning.ipynb
@@ -52,7 +52,8 @@
     "\n",
     "if IS_COLAB or IS_KAGGLE:\n",
     "    !apt update && apt install -y libpq-dev libsdl2-dev swig xorg-dev xvfb\n",
-    "    !pip install -q -U tf-agents pyvirtualdisplay gym[atari,box2d]\n",
+    "    !pip install -q -U tf-agents pyvirtualdisplay gym[box2d]\n",
+    "    !pip install -q -U atari_py==0.2.5\n",
     "\n",
     "# Scikit-Learn ≥0.20 is required\n",
     "import sklearn\n",
diff --git a/requirements.txt b/requirements.txt
index 767ded9..0b7d84d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -49,7 +49,9 @@ tensorflow-addons==0.12.1
 
 # There are a few dependencies you need to install first, check out:
 # https://github.com/openai/gym#installing-everything
-gym[atari,Box2D]==0.18.0
+gym[Box2D]==0.18.0
+atari_py==0.2.5
+
 # On Windows, install atari_py using:
 # pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
 

From 2bd68d6348d541e961b4d25c96c6ec86548ce6fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sat, 2 Oct 2021 22:40:18 +1300
Subject: [PATCH 06/16] Fix some section levels

---
 12_custom_models_and_training_with_tensorflow.ipynb | 2 +-
 13_loading_and_preprocessing_data.ipynb             | 2 +-
 14_deep_computer_vision_with_cnns.ipynb             | 2 +-
 15_processing_sequences_using_rnns_and_cnns.ipynb   | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/12_custom_models_and_training_with_tensorflow.ipynb b/12_custom_models_and_training_with_tensorflow.ipynb
index 6f6d6e8..723c20f 100644
--- a/12_custom_models_and_training_with_tensorflow.ipynb
+++ b/12_custom_models_and_training_with_tensorflow.ipynb
@@ -3640,7 +3640,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 12. Implement a custom layer that performs _Layer Normalization_\n",
+    "## 12. Implement a custom layer that performs _Layer Normalization_\n",
     "_We will use this type of layer in Chapter 15 when using Recurrent Neural Networks._"
    ]
   },
diff --git a/13_loading_and_preprocessing_data.ipynb b/13_loading_and_preprocessing_data.ipynb
index 1299aec..8bb30cb 100644
--- a/13_loading_and_preprocessing_data.ipynb
+++ b/13_loading_and_preprocessing_data.ipynb
@@ -1603,7 +1603,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Using Feature Columns for Parsing"
+    "## Using Feature Columns for Parsing"
    ]
   },
   {
diff --git a/14_deep_computer_vision_with_cnns.ipynb b/14_deep_computer_vision_with_cnns.ipynb
index ed7ea3b..8627e80 100644
--- a/14_deep_computer_vision_with_cnns.ipynb
+++ b/14_deep_computer_vision_with_cnns.ipynb
@@ -1173,7 +1173,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Mean Average Precision (mAP)"
+    "## Mean Average Precision (mAP)"
    ]
   },
   {
diff --git a/15_processing_sequences_using_rnns_and_cnns.ipynb b/15_processing_sequences_using_rnns_and_cnns.ipynb
index 1d63131..99dba80 100644
--- a/15_processing_sequences_using_rnns_and_cnns.ipynb
+++ b/15_processing_sequences_using_rnns_and_cnns.ipynb
@@ -114,7 +114,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Generate the Dataset"
+    "## Generate the Dataset"
    ]
   },
   {
@@ -189,7 +189,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Computing Some Baselines"
+    "## Computing Some Baselines"
    ]
   },
   {
@@ -289,7 +289,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Using a Simple RNN"
+    "## Using a Simple RNN"
    ]
   },
   {

From 6b821335c02fae07718710ed7b6d9913127a3799 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sun, 3 Oct 2021 00:14:44 +1300
Subject: [PATCH 07/16] Add some section headers

---
 02_end_to_end_machine_learning_project.ipynb | 138 ++++++++++++++++++-
 03_classification.ipynb                      |  47 ++++++-
 04_training_linear_models.ipynb              |  80 +++++++++--
 3 files changed, 239 insertions(+), 26 deletions(-)

diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb
index efa5324..d8c8349 100644
--- a/02_end_to_end_machine_learning_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@@ -83,7 +83,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Get the data"
+    "# Get the Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Download the Data"
    ]
   },
   {
@@ -132,6 +139,13 @@
     "    return pd.read_csv(csv_path)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Take a Quick Look at the Data Structure"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -182,6 +196,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a Test Set"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 10,
@@ -443,7 +464,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Discover and visualize the data to gain insights"
+    "# Discover and Visualize the Data to Gain Insights"
    ]
   },
   {
@@ -455,6 +476,13 @@
     "housing = strat_train_set.copy()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visualizing Geographical Data"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 33,
@@ -540,6 +568,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Looking for Correlations"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 38,
@@ -585,6 +620,13 @@
     "save_fig(\"income_vs_house_value_scatterplot\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Experimenting with Attribute Combinations"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 42,
@@ -631,7 +673,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Prepare the data for Machine Learning algorithms"
+    "# Prepare the Data for Machine Learning Algorithms"
    ]
   },
   {
@@ -644,6 +686,29 @@
     "housing_labels = strat_train_set[\"median_house_value\"].copy()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Cleaning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the book 3 options are listed:\n",
+    "\n",
+    "```python\n",
+    "housing.dropna(subset=[\"total_bedrooms\"])    # option 1\n",
+    "housing.drop(\"total_bedrooms\", axis=1)       # option 2\n",
+    "median = housing[\"total_bedrooms\"].median()  # option 3\n",
+    "housing[\"total_bedrooms\"].fillna(median, inplace=True)\n",
+    "```\n",
+    "\n",
+    "To demonstrate each of them, let's create a copy of the housing dataset, but keeping only the rows that contain at least one null. Then it will be easier to visualize exactly what each option does:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 47,
@@ -815,6 +880,13 @@
     "housing_tr.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Handling Text and Categorical Attributes"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -910,6 +982,13 @@
     "cat_encoder.categories_"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Custom Transformers"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -985,6 +1064,13 @@
     "housing_extra_attribs.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transformation Pipelines"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1154,7 +1240,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Select and train a model "
+    "# Select and Train a Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training and Evaluating on the Training Set"
    ]
   },
   {
@@ -1269,7 +1362,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Fine-tune your model"
+    "## Better Evaluation Using Cross-Validation"
    ]
   },
   {
@@ -1382,6 +1475,20 @@
     "svm_rmse"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Fine-Tune Your Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Grid Search"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 99,
@@ -1457,6 +1564,13 @@
     "pd.DataFrame(grid_search.cv_results_)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Randomized Search"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 104,
@@ -1488,6 +1602,13 @@
     "    print(np.sqrt(-mean_score), params)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Analyze the Best Models and Their Errors"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 106,
@@ -1512,6 +1633,13 @@
     "sorted(zip(feature_importances, attributes), reverse=True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate Your System on the Test Set"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 108,
diff --git a/03_classification.ipynb b/03_classification.ipynb
index 9e2885e..6c8e0db 100644
--- a/03_classification.ipynb
+++ b/03_classification.ipynb
@@ -245,7 +245,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Binary classifier"
+    "# Training a Binary Classifier"
    ]
   },
   {
@@ -296,6 +296,20 @@
     "cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring=\"accuracy\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Performance Measures"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Measuring Accuracy Using Cross-Validation"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 18,
@@ -362,6 +376,13 @@
     "* lastly, other things may prevent perfect reproducibility, such as Python dicts and sets whose order is not guaranteed to be stable across sessions, or the order of files in a directory which is also not guaranteed."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Confusion Matrix"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 21,
@@ -394,6 +415,13 @@
     "confusion_matrix(y_train_5, y_train_perfect_predictions)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Precision and Recall"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 24,
@@ -453,6 +481,13 @@
     "cm[1, 1] / (cm[1, 1] + (cm[1, 0] + cm[0, 1]) / 2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Precision/Recall Trade-off"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 30,
@@ -625,7 +660,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# ROC curves"
+    "## The ROC Curve"
    ]
   },
   {
@@ -757,7 +792,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Multiclass classification"
+    "# Multiclass Classification"
    ]
   },
   {
@@ -882,7 +917,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Error analysis"
+    "# Error Analysis"
    ]
   },
   {
@@ -969,7 +1004,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Multilabel classification"
+    "# Multilabel Classification"
    ]
   },
   {
@@ -1018,7 +1053,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Multioutput classification"
+    "# Multioutput Classification"
    ]
   },
   {
diff --git a/04_training_linear_models.ipynb b/04_training_linear_models.ipynb
index 94d90b1..ae910ef 100644
--- a/04_training_linear_models.ipynb
+++ b/04_training_linear_models.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Chapter 4 – Training Linear Models**"
+    "**Chapter 4 – Training Models**"
    ]
   },
   {
@@ -89,7 +89,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Linear regression using the Normal Equation"
+    "# Linear Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The Normal Equation"
    ]
   },
   {
@@ -243,7 +250,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Linear regression using batch gradient descent"
+    "# Gradient Descent\n",
+    "## Batch Gradient Descent"
    ]
   },
   {
@@ -330,7 +338,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Stochastic Gradient Descent"
+    "## Stochastic Gradient Descent"
    ]
   },
   {
@@ -416,7 +424,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Mini-batch gradient descent"
+    "## Mini-batch gradient descent"
    ]
   },
   {
@@ -494,7 +502,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Polynomial regression"
+    "# Polynomial Regression"
    ]
   },
   {
@@ -616,6 +624,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Learning Curves"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 35,
@@ -678,7 +693,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Regularized models"
+    "# Regularized Linear Models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Ridge Regression"
    ]
   },
   {
@@ -772,6 +794,13 @@
     "sgd_reg.predict([[1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lasso Regression"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 43,
@@ -803,6 +832,13 @@
     "lasso_reg.predict([[1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Elastic Net"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 45,
@@ -815,6 +851,13 @@
     "elastic_net.predict([[1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Early Stopping"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 46,
@@ -829,13 +872,6 @@
     "X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Early stopping example:"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 47,
@@ -1029,7 +1065,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Logistic regression"
+    "# Logistic Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Decision Boundaries"
    ]
   },
   {
@@ -1166,6 +1209,13 @@
     "log_reg.predict([[1.7], [1.5]])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Softmax Regression"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 62,

From 3f89676892c4d681789227a141dc8e901576f01f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sun, 3 Oct 2021 23:05:49 +1300
Subject: [PATCH 08/16] Improve alignment between notebook and book section
 headers

---
 01_the_machine_learning_landscape.ipynb       |   2 +-
 05_support_vector_machines.ipynb              | 145 +++++++++--
 06_decision_trees.ipynb                       | 130 ++++++++--
 07_ensemble_learning_and_random_forests.ipynb | 225 +++++++++++++-----
 08_dimensionality_reduction.ipynb             | 172 ++++++++++---
 09_unsupervised_learning.ipynb                |  37 ++-
 6 files changed, 560 insertions(+), 151 deletions(-)

diff --git a/01_the_machine_learning_landscape.ipynb b/01_the_machine_learning_landscape.ipynb
index 9ef739f..fb012b3 100644
--- a/01_the_machine_learning_landscape.ipynb
+++ b/01_the_machine_learning_landscape.ipynb
@@ -93,7 +93,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The code in the book expects the data files to be located in the current directory. I just tweaked it here to fetch the files in datasets/lifesat."
+    "The code in the book expects the data files to be located in the current directory. I just tweaked it here to fetch the files in `datasets/lifesat`."
    ]
   },
   {
diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb
index 57a6085..6200fcb 100644
--- a/05_support_vector_machines.ipynb
+++ b/05_support_vector_machines.ipynb
@@ -84,14 +84,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Large margin classification"
+    "# Linear SVM Classification"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The next few code cells generate the first figures in chapter 5. The first actual code sample comes after:"
+    "The next few code cells generate the first figures in chapter 5. The first actual code sample comes after.\n",
+    "\n",
+    "**Code to generate Figure 5–1. Large margin classification**"
    ]
   },
   {
@@ -175,7 +177,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Sensitivity to feature scales"
+    "**Code to generate Figure 5–2. Sensitivity to feature scales**"
    ]
   },
   {
@@ -220,7 +222,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Sensitivity to outliers"
+    "## Soft Margin Classification\n",
+    "**Code to generate Figure 5–3. Hard margin sensitivity to outliers**"
    ]
   },
   {
@@ -278,14 +281,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Large margin *vs* margin violations"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This is the first code example in chapter 5:"
+    "**This is the first code example in chapter 5:**"
    ]
   },
   {
@@ -325,7 +321,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now let's generate the graph comparing different regularization settings:"
+    "**Code to generate Figure 5–4. Large margin versus fewer margin violations**"
    ]
   },
   {
@@ -408,7 +404,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Non-linear classification"
+    "# Nonlinear SVM Classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–5. Adding features to make a dataset linearly separable**"
    ]
   },
   {
@@ -471,6 +474,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Here is second code example in the chapter:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 13,
@@ -490,6 +500,13 @@
     "polynomial_svm_clf.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–6. Linear SVM classifier using polynomial features**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 14,
@@ -513,6 +530,20 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Polynomial Kernel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Next code example:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 15,
@@ -528,6 +559,13 @@
     "poly_kernel_svm_clf.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–7. SVM classifiers with a polynomial kernel**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 16,
@@ -564,6 +602,20 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Similarity Features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–8. Similarity features using the Gaussian RBF**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 18,
@@ -644,6 +696,20 @@
     "    print(\"Phi({}, {}) = {}\".format(x1_example, landmark, k))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Gaussian RBF Kernel"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Next code example:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 20,
@@ -657,6 +723,13 @@
     "rbf_kernel_svm_clf.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–9. SVM classifiers using an RBF kernel**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 21,
@@ -701,7 +774,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Regression\n"
+    "# SVM Regression"
    ]
   },
   {
@@ -716,6 +789,13 @@
     "y = (4 + 3 * X + np.random.randn(m, 1)).ravel()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Next code example:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 23,
@@ -728,6 +808,13 @@
     "svm_reg.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–10. SVM Regression**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 24,
@@ -807,6 +894,13 @@
     "**Note**: to be future-proof, we set `gamma=\"scale\"`, as this will be the default value in Scikit-Learn 0.22."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Next code example:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 27,
@@ -819,6 +913,13 @@
     "svm_poly_reg.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–11. SVM Regression using a second-degree polynomial kernel**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 28,
@@ -855,7 +956,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Under the hood"
+    "# Under the Hood\n",
+    "## Decision Function and Predictions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 5–12. Decision function for the iris dataset**"
    ]
   },
   {
@@ -917,7 +1026,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Small weight vector results in a large margin"
+    "**Code to generate Figure 5–13. A smaller weight vector results in a larger margin**"
    ]
   },
   {
@@ -976,7 +1085,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Hinge loss"
+    "**Code to generate the Hinge Loss figure:**"
    ]
   },
   {
diff --git a/06_decision_trees.ipynb b/06_decision_trees.ipynb
index ab7f00a..dc96c32 100644
--- a/06_decision_trees.ipynb
+++ b/06_decision_trees.ipynb
@@ -89,7 +89,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Training and visualizing"
+    "# Training and Visualizing a Decision Tree"
    ]
   },
   {
@@ -109,6 +109,13 @@
     "tree_clf.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**This code example generates Figure 6–1. Iris Decision Tree:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 3,
@@ -130,6 +137,20 @@
     "Source.from_file(os.path.join(IMAGES_PATH, \"iris_tree.dot\"))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Making Predictions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6–2. Decision Tree decision boundaries**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
@@ -181,7 +202,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Predicting classes and class probabilities"
+    "# Estimating Class Probabilities"
    ]
   },
   {
@@ -206,7 +227,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# High Variance"
+    "## Regularization Hyperparameters"
    ]
   },
   {
@@ -227,6 +248,13 @@
     "tree_clf_tweaked.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6–8. Sensitivity to training set details:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 8,
@@ -244,9 +272,16 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6–3. Regularization using min_samples_leaf:**"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -271,9 +306,16 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Rotating the dataset also leads to completely different decision boundaries:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -290,9 +332,16 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6–7. Sensitivity to training set rotation**"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -324,12 +373,19 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Regression trees"
+    "# Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's prepare a simple linear dataset:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -341,9 +397,16 @@
     "y = y + np.random.randn(m, 1) / 10"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code example:**"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -353,9 +416,16 @@
     "tree_reg.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6–5. Predictions of two Decision Tree regression models:**"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -400,9 +470,16 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6-4. A Decision Tree for regression:**"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -417,16 +494,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
     "Source.from_file(os.path.join(IMAGES_PATH, \"regression_tree.dot\"))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 6–6. Regularizing a Decision Tree regressor:**"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -512,7 +596,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -530,7 +614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -548,7 +632,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -562,7 +646,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -585,7 +669,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -618,7 +702,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -645,7 +729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -673,7 +757,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -685,7 +769,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -703,7 +787,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb
index fde20a0..7558d0f 100644
--- a/07_ensemble_learning_and_random_forests.ipynb
+++ b/07_ensemble_learning_and_random_forests.ipynb
@@ -89,7 +89,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Voting classifiers"
+    "# Voting Classifiers"
    ]
   },
   {
@@ -103,6 +103,13 @@
     "cumulative_heads_ratio = np.cumsum(coin_tosses, axis=0) / np.arange(1, 10001).reshape(-1, 1)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 7–3. The law of large numbers:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 3,
@@ -121,6 +128,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's use the moons dataset:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
@@ -141,6 +155,13 @@
     "**Note**: to be future-proof, we set `solver=\"lbfgs\"`, `n_estimators=100`, and `gamma=\"scale\"` since these will be the default values in upcoming Scikit-Learn versions."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code examples:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -232,7 +253,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Bagging ensembles"
+    "# Bagging and Pasting\n",
+    "## Bagging and Pasting in Scikit-Learn"
    ]
   },
   {
@@ -273,6 +295,13 @@
     "print(accuracy_score(y_test, y_pred_tree))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 7–5. A single Decision Tree (left) versus a bagging ensemble of 500 trees (right):**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 13,
@@ -302,7 +331,9 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "fix, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)\n",
@@ -321,7 +352,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Random Forests"
+    "## Out-of-Bag evaluation"
    ]
   },
   {
@@ -331,8 +362,10 @@
    "outputs": [],
    "source": [
     "bag_clf = BaggingClassifier(\n",
-    "    DecisionTreeClassifier(max_features=\"sqrt\", max_leaf_nodes=16),\n",
-    "    n_estimators=500, random_state=42)"
+    "    DecisionTreeClassifier(), n_estimators=500,\n",
+    "    bootstrap=True, oob_score=True, random_state=40)\n",
+    "bag_clf.fit(X_train, y_train)\n",
+    "bag_clf.oob_score_"
    ]
   },
   {
@@ -341,13 +374,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bag_clf.fit(X_train, y_train)\n",
-    "y_pred = bag_clf.predict(X_test)"
+    "bag_clf.oob_decision_function_"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import accuracy_score\n",
+    "y_pred = bag_clf.predict(X_test)\n",
+    "accuracy_score(y_test, y_pred)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Random Forests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -359,18 +411,53 @@
     "y_pred_rf = rnd_clf.predict(X_test)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A Random Forest is equivalent to a bag of decision trees:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bag_clf = BaggingClassifier(\n",
+    "    DecisionTreeClassifier(max_features=\"sqrt\", max_leaf_nodes=16),\n",
+    "    n_estimators=500, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bag_clf.fit(X_train, y_train)\n",
+    "y_pred = bag_clf.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
     "np.sum(y_pred == y_pred_rf) / len(y_pred)  # very similar predictions"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Feature Importance"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -384,16 +471,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
     "rnd_clf.feature_importances_"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following figure overlays the decision boundaries of 15 decision trees. As you can see, even though each decision tree is imperfect, the ensemble defines a pretty good decision boundary:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -412,47 +506,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Out-of-Bag evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bag_clf = BaggingClassifier(\n",
-    "    DecisionTreeClassifier(), n_estimators=500,\n",
-    "    bootstrap=True, oob_score=True, random_state=40)\n",
-    "bag_clf.fit(X_train, y_train)\n",
-    "bag_clf.oob_score_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bag_clf.oob_decision_function_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.metrics import accuracy_score\n",
-    "y_pred = bag_clf.predict(X_test)\n",
-    "accuracy_score(y_test, y_pred)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Feature importance"
+    "**Code to generate Figure 7–6. MNIST pixel importance (according to a Random Forest classifier):**"
    ]
   },
   {
@@ -516,7 +570,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# AdaBoost"
+    "# Boosting\n",
+    "## AdaBoost"
    ]
   },
   {
@@ -542,6 +597,13 @@
     "plot_decision_boundary(ada_clf, X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 7–8. Decision boundaries of consecutive predictors:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 31,
@@ -583,7 +645,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Gradient Boosting"
+    "## Gradient Boosting"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let create a simple quadratic dataset:"
    ]
   },
   {
@@ -597,6 +666,13 @@
     "y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's train a decision tree regressor on this dataset:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 33,
@@ -658,6 +734,13 @@
     "y_pred"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 7–9. In this depiction of Gradient Boosting, the first predictor (top left) is trained normally, then each consecutive predictor (middle left and lower left) is trained on the previous predictor’s residuals; the right column shows the resulting ensemble’s predictions:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 39,
@@ -714,6 +797,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's try a gradient boosting regressor:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 41,
@@ -726,6 +816,13 @@
     "gbrt.fit(X, y)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 7–10. GBRT ensembles with not enough predictors (left) and too many (right):**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 42,
@@ -763,7 +860,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Gradient Boosting with Early stopping"
+    "**Gradient Boosting with Early stopping:**"
    ]
   },
   {
@@ -789,6 +886,13 @@
     "gbrt_best.fit(X_train, y_train)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 7–11. Tuning the number of trees using early stopping:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 45,
@@ -827,6 +931,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Early stopping with some patience (interrupts training only after there's no improvement for 5 epochs):"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 47,
@@ -873,7 +984,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Using XGBoost"
+    "**Using XGBoost:**"
    ]
   },
   {
diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb
index 6149991..ecb8b9d 100644
--- a/08_dimensionality_reduction.ipynb
+++ b/08_dimensionality_reduction.ipynb
@@ -84,8 +84,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Projection methods\n",
-    "Build 3D dataset:"
+    "# PCA\n",
+    "Let's build a simple 3D dataset:"
    ]
   },
   {
@@ -110,7 +110,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## PCA using SVD decomposition"
+    "## Principal Components"
    ]
   },
   {
@@ -146,6 +146,13 @@
     "np.allclose(X_centered, U.dot(S).dot(Vt))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Projecting Down to d Dimensions"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -169,7 +176,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## PCA using Scikit-Learn"
+    "## Using Scikit-Learn"
    ]
   },
   {
@@ -344,6 +351,13 @@
     "Notice how the axes are flipped."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Explained Variance Ratio"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -406,6 +420,13 @@
     "Next, let's generate some nice figures! :)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–2. A 3D dataset lying close to a 2D subspace:**"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -515,6 +536,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–3. The new 2D dataset after projection:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 25,
@@ -540,8 +568,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Manifold learning\n",
-    "Swiss roll:"
+    "**Code to generate Figure 8–4. Swiss roll dataset:**"
    ]
   },
   {
@@ -551,6 +578,7 @@
    "outputs": [],
    "source": [
     "from sklearn.datasets import make_swiss_roll\n",
+    "\n",
     "X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)"
    ]
   },
@@ -578,6 +606,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–5. Squashing by projecting onto a plane (left) versus unrolling the Swiss roll (right):**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 28,
@@ -603,6 +638,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–6. The decision boundary may not always be simpler with lower dimensions:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 29,
@@ -688,7 +730,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# PCA"
+    "**Code to generate Figure 8–7. Selecting the subspace to project on:**"
    ]
   },
   {
@@ -761,7 +803,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# MNIST compression"
+    "## Choosing the Right Number of Dimensions"
    ]
   },
   {
@@ -818,6 +860,13 @@
     "d"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–8. Explained variance as a function of the number of dimensions:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 35,
@@ -867,6 +916,13 @@
     "np.sum(pca.explained_variance_ratio_)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## PCA for Compression"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 39,
@@ -878,6 +934,13 @@
     "X_recovered = pca.inverse_transform(X_reduced)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–9. MNIST compression that preserves 95% of the variance:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 40,
@@ -930,7 +993,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Incremental PCA"
+    "## Randomized PCA"
    ]
   },
   {
@@ -938,6 +1001,23 @@
    "execution_count": 43,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "rnd_pca = PCA(n_components=154, svd_solver=\"randomized\", random_state=42)\n",
+    "X_reduced = rnd_pca.fit_transform(X_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Incremental PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from sklearn.decomposition import IncrementalPCA\n",
     "\n",
@@ -952,16 +1032,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
     "X_recovered_inc_pca = inc_pca.inverse_transform(X_reduced)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check that compression still works well:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -975,7 +1062,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -991,7 +1078,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1007,7 +1094,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1018,7 +1105,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Using `memmap()`"
+    "**Using `memmap()`:**"
    ]
   },
   {
@@ -1030,7 +1117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1050,7 +1137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1066,7 +1153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1077,21 +1164,11 @@
     "inc_pca.fit(X_mm)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "rnd_pca = PCA(n_components=154, svd_solver=\"randomized\", random_state=42)\n",
-    "X_reduced = rnd_pca.fit_transform(X_train)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Time complexity"
+    "**Time complexity:**"
    ]
   },
   {
@@ -1226,6 +1303,13 @@
     "X_reduced = rbf_pca.fit_transform(X)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–10. Swiss roll reduced to 2D using kPCA with various kernels:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 58,
@@ -1260,6 +1344,13 @@
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–11. Kernel PCA and the reconstruction pre-image error:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 59,
@@ -1300,6 +1391,13 @@
     "plt.grid(True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Selecting a Kernel and Tuning Hyperparameters"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 61,
@@ -1384,6 +1482,13 @@
     "X_reduced = lle.fit_transform(X)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–12. Unrolled Swiss roll using LLE:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 67,
@@ -1405,7 +1510,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# MDS, Isomap and t-SNE"
+    "## Other Dimensionality Reduction Techniques"
    ]
   },
   {
@@ -1459,6 +1564,13 @@
     "X_reduced_lda = lda.transform(X_mnist)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Code to generate Figure 8–13. Using various techniques to reduce the Swill roll to 2D:**"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 72,
diff --git a/09_unsupervised_learning.ipynb b/09_unsupervised_learning.ipynb
index 10be3ea..ee12489 100644
--- a/09_unsupervised_learning.ipynb
+++ b/09_unsupervised_learning.ipynb
@@ -91,7 +91,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Introduction – Classification _vs_ Clustering"
+    "**Introduction – Classification _vs_ Clustering**"
    ]
   },
   {
@@ -320,7 +320,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Fit and Predict"
+    "**Fit and predict**"
    ]
   },
   {
@@ -428,7 +428,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Decision Boundaries"
+    "**Decision Boundaries**"
    ]
   },
   {
@@ -507,7 +507,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Hard Clustering _vs_ Soft Clustering"
+    "**Hard Clustering _vs_ Soft Clustering**"
    ]
   },
   {
@@ -546,7 +546,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### K-Means Algorithm"
+    "### The K-Means Algorithm"
    ]
   },
   {
@@ -639,7 +639,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### K-Means Variability"
+    "**K-Means Variability**"
    ]
   },
   {
@@ -827,7 +827,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### K-Means++"
+    "### Centroid initialization methods"
    ]
   },
   {
@@ -1432,7 +1432,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Limits of K-Means"
+    "## Limits of K-Means"
    ]
   },
   {
@@ -1494,7 +1494,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Using clustering for image segmentation"
+    "## Using Clustering for Image Segmentation"
    ]
   },
   {
@@ -1578,7 +1578,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Using Clustering for Preprocessing"
+    "## Using Clustering for Preprocessing"
    ]
   },
   {
@@ -1785,7 +1785,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Clustering for Semi-supervised Learning"
+    "## Using Clustering for Semi-Supervised Learning"
    ]
   },
   {
@@ -2756,7 +2756,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Anomaly Detection using Gaussian Mixtures"
+    "## Anomaly Detection Using Gaussian Mixtures"
    ]
   },
   {
@@ -2797,7 +2797,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Model selection"
+    "## Selecting the Number of Clusters"
    ]
   },
   {
@@ -2983,7 +2983,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Variational Bayesian Gaussian Mixtures"
+    "## Bayesian Gaussian Mixture Models"
    ]
   },
   {
@@ -3151,7 +3151,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Likelihood Function"
+    "**Likelihood Function**"
    ]
   },
   {
@@ -3242,13 +3242,6 @@
     "plt.show()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},

From c14e87edc7574f213cd016f320e666b5874caa09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Wed, 6 Oct 2021 19:13:44 +1300
Subject: [PATCH 09/16] X_train, y_train instead of X, y, fixes #474

---
 07_ensemble_learning_and_random_forests.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb
index 7558d0f..f8b6789 100644
--- a/07_ensemble_learning_and_random_forests.ipynb
+++ b/07_ensemble_learning_and_random_forests.ipynb
@@ -496,7 +496,7 @@
     "for i in range(15):\n",
     "    tree_clf = DecisionTreeClassifier(max_leaf_nodes=16, random_state=42 + i)\n",
     "    indices_with_replacement = np.random.randint(0, len(X_train), len(X_train))\n",
-    "    tree_clf.fit(X[indices_with_replacement], y[indices_with_replacement])\n",
+    "    tree_clf.fit(X_train[indices_with_replacement], y_train[indices_with_replacement])\n",
     "    plot_decision_boundary(tree_clf, X, y, axes=[-1.5, 2.45, -1, 1.5], alpha=0.02, contour=False)\n",
     "\n",
     "plt.show()"

From a8f393cf342b7d8cd799ce0ae60b0fb7aad6c414 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joaqu=C3=ADn=20Ruales?=
 <1588988+jruales@users.noreply.github.com>
Date: Wed, 6 Oct 2021 15:19:09 -0700
Subject: [PATCH 10/16] Fix LaTeX in math_differential_calculus.ipynb

---
 math_differential_calculus.ipynb | 252 +++++++++++++++----------------
 1 file changed, 126 insertions(+), 126 deletions(-)

diff --git a/math_differential_calculus.ipynb b/math_differential_calculus.ipynb
index e0ff2bb..9310834 100644
--- a/math_differential_calculus.ipynb
+++ b/math_differential_calculus.ipynb
@@ -439,16 +439,16 @@
     "Let's look at a concrete example. Let's see if we can determine what the slope of the $y=x^2$ curve is, at any point $\\mathrm{A}$ (try to understand each line, I promise it's not that hard):\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x_\\mathrm{A}) \\, && = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{f(x_\\mathrm{B}) - f(x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}} \\\\\n",
-    "&& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{{x_\\mathrm{B}}^2 - {x_\\mathrm{A}}^2}{x_\\mathrm{B} - x_\\mathrm{A}} \\quad && \\text{since } f(x) = x^2\\\\\n",
-    "&& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{(x_\\mathrm{B} - x_\\mathrm{A})(x_\\mathrm{B} + x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}}\\quad && \\text{since } {x_\\mathrm{A}}^2 - {x_\\mathrm{B}}^2 = (x_\\mathrm{A}-x_\\mathrm{B})(x_\\mathrm{A}+x_\\mathrm{B})\\\\\n",
-    "&& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim(x_\\mathrm{B} + x_\\mathrm{A})\\quad && \\text{since the two } (x_\\mathrm{B} - x_\\mathrm{A}) \\text{ cancel out}\\\\\n",
-    "&& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{B} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A}\\quad && \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "&& = x_\\mathrm{A} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A} \\quad && \\text{since } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
-    "&& = x_\\mathrm{A} + x_\\mathrm{A} \\quad && \\text{since } x_\\mathrm{A} \\text{ remains constant when } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
-    "&& = 2 x_\\mathrm{A}\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x_\\mathrm{A}) \\, & = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{f(x_\\mathrm{B}) - f(x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}} \\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{{x_\\mathrm{B}}^2 - {x_\\mathrm{A}}^2}{x_\\mathrm{B} - x_\\mathrm{A}} \\quad & \\text{since } f(x) = x^2\\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{(x_\\mathrm{B} - x_\\mathrm{A})(x_\\mathrm{B} + x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}}\\quad & \\text{since } {x_\\mathrm{A}}^2 - {x_\\mathrm{B}}^2 = (x_\\mathrm{A}-x_\\mathrm{B})(x_\\mathrm{A}+x_\\mathrm{B})\\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim(x_\\mathrm{B} + x_\\mathrm{A})\\quad & \\text{since the two } (x_\\mathrm{B} - x_\\mathrm{A}) \\text{ cancel out}\\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{B} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A}\\quad & \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = x_\\mathrm{A} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A} \\quad & \\text{since } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
+    "& = x_\\mathrm{A} + x_\\mathrm{A} \\quad & \\text{since } x_\\mathrm{A} \\text{ remains constant when } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
+    "& = 2 x_\\mathrm{A}\n",
+    "\\end{align*}\n",
     "$\n",
     "\n",
     "That's it! We just proved that the slope of $y = x^2$ at any point $\\mathrm{A}$ is $f'(x_\\mathrm{A}) = 2x_\\mathrm{A}$. What we have done is called **differentiation**: finding the derivative of a function."
@@ -517,14 +517,14 @@
     "Okay! Now let's use this new definition to find the derivative of $f(x) = x^2$ at any point $x$, and (hopefully) we should find the same result as above (except using $x$ instead of $x_\\mathrm{A}$):\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) \\, && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x + \\epsilon) - f(x)}{\\epsilon} \\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{(x + \\epsilon)^2 - {x}^2}{\\epsilon} \\quad && \\text{since } f(x) = x^2\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{{x}^2 + 2x\\epsilon + \\epsilon^2 - {x}^2}{\\epsilon}\\quad && \\text{since } (x + \\epsilon)^2 = {x}^2 + 2x\\epsilon + \\epsilon^2\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{2x\\epsilon + \\epsilon^2}{\\epsilon}\\quad && \\text{since the two } {x}^2 \\text{ cancel out}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim \\, (2x + \\epsilon)\\quad && \\text{since } 2x\\epsilon \\text{ and } \\epsilon^2 \\text{ can both be divided by } \\epsilon\\\\\n",
-    "&& = 2 x\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) \\, & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x + \\epsilon) - f(x)}{\\epsilon} \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{(x + \\epsilon)^2 - {x}^2}{\\epsilon} \\quad & \\text{since } f(x) = x^2\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{{x}^2 + 2x\\epsilon + \\epsilon^2 - {x}^2}{\\epsilon}\\quad & \\text{since } (x + \\epsilon)^2 = {x}^2 + 2x\\epsilon + \\epsilon^2\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{2x\\epsilon + \\epsilon^2}{\\epsilon}\\quad & \\text{since the two } {x}^2 \\text{ cancel out}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim \\, (2x + \\epsilon)\\quad & \\text{since } 2x\\epsilon \\text{ and } \\epsilon^2 \\text{ can both be divided by } \\epsilon\\\\\n",
+    "& = 2 x\n",
+    "\\end{align*}\n",
     "$\n",
     "\n",
     "Yep! It works out."
@@ -705,13 +705,13 @@
     "One very important rule is that **the derivative of a sum is the sum of the derivatives**. More precisely, if we define $f(x) = g(x) + h(x)$, then $f'(x) = g'(x) + h'(x)$. This is quite easy to prove:\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) + h(x+\\epsilon) - g(x) - h(x)}{\\epsilon} && \\quad \\text{using }f(x) = g(x) + h(x) \\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x) + h(x+\\epsilon) - h(x)}{\\epsilon} && \\quad \\text{just moving terms around}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{h(x+\\epsilon) - h(x)}{\\epsilon} && \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "&& = g'(x) + h'(x) && \\quad \\text{using the definitions of }g'(x) \\text{ and } h'(x)\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) + h(x+\\epsilon) - g(x) - h(x)}{\\epsilon} & \\quad \\text{using }f(x) = g(x) + h(x) \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x) + h(x+\\epsilon) - h(x)}{\\epsilon} & \\quad \\text{just moving terms around}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{h(x+\\epsilon) - h(x)}{\\epsilon} & \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = g'(x) + h'(x) & \\quad \\text{using the definitions of }g'(x) \\text{ and } h'(x)\n",
+    "\\end{align*}\n",
     "$"
    ]
   },
@@ -1213,18 +1213,18 @@
     "$\n",
     "\\mathbf{J}_\\mathbf{f}(\\mathbf{x}_\\mathbf{A}) = \\begin{pmatrix}\n",
     "\\dfrac{\\partial f_1}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dfrac{\\partial f_1}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dots\n",
-    "&& \\dfrac{\\partial f_1}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "& \\dfrac{\\partial f_1}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dots\n",
+    "& \\dfrac{\\partial f_1}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
     "\\dfrac{\\partial f_2}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dfrac{\\partial f_2}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dots\n",
-    "&& \\dfrac{\\partial f_2}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
-    "\\vdots && \\vdots && \\ddots && \\vdots \\\\\n",
+    "& \\dfrac{\\partial f_2}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dots\n",
+    "& \\dfrac{\\partial f_2}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "\\vdots & \\vdots & \\ddots & \\vdots \\\\\n",
     "\\dfrac{\\partial f_m}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dfrac{\\partial f_m}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dots\n",
-    "&& \\dfrac{\\partial f_m}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dfrac{\\partial f_m}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dots\n",
+    "& \\dfrac{\\partial f_m}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\n",
     "\\end{pmatrix}\n",
     "$\n",
     "\n",
@@ -1257,18 +1257,18 @@
     "$\n",
     "\\mathbf{H}_f(\\mathbf{x}_\\mathbf{A}) = \\begin{pmatrix}\n",
     "\\dfrac{\\partial^2 f}{\\partial {x_1}^2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dots\n",
-    "&& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dots\n",
+    "& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
     "\\dfrac{\\partial^2 f}{\\partial x_2\\,\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dfrac{\\partial^2 f}{\\partial {x_2}^2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dots\n",
-    "&& \\dfrac{\\partial^2 f}{\\partial x_2\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
-    "\\vdots && \\vdots && \\ddots && \\vdots \\\\\n",
+    "& \\dfrac{\\partial^2 f}{\\partial {x_2}^2}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dots\n",
+    "& \\dfrac{\\partial^2 f}{\\partial x_2\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "\\vdots & \\vdots & \\ddots & \\vdots \\\\\n",
     "\\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "&& \\dots\n",
-    "&& \\dfrac{\\partial^2 f}{\\partial {x_n}^2}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "& \\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "& \\dots\n",
+    "& \\dfrac{\\partial^2 f}{\\partial {x_n}^2}(\\mathbf{x}_\\mathbf{A})\\\\\n",
     "\\end{pmatrix}\n",
     "$"
    ]
@@ -1305,12 +1305,12 @@
     "## Constant: $f(x)=c$\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{c - c}{\\epsilon} && \\quad \\text{using }f(x) = c \\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim 0 && \\quad \\text{since }c - c = 0\\\\\n",
-    "&& = 0 && \\quad \\text{since the limit of a constant is that constant}\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{c - c}{\\epsilon} & \\quad \\text{using }f(x) = c \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim 0 & \\quad \\text{since }c - c = 0\\\\\n",
+    "& = 0 & \\quad \\text{since the limit of a constant is that constant}\n",
+    "\\end{align*}\n",
     "$\n"
    ]
   },
@@ -1324,18 +1324,18 @@
     "## Product rule: $f(x)=g(x)h(x)$\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x)}{\\epsilon} && \\quad \\text{using }f(x) = g(x)h(x) \\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon) + g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} && \\quad \\text{subtracting and adding }g(x)h(x + \\epsilon)\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} && \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, \\underset{\\epsilon \\to 0}\\lim{\\left[g(x)\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}\\right]} && \\quad \\text{factorizing }h(x+\\epsilon) \\text{ and } g(x)\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)\\underset{\\epsilon \\to 0}\\lim{\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}} && \\quad \\text{taking } g(x) \\text{ out of the limit since it does not depend on }\\epsilon\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)h'(x) && \\quad \\text{using the definition of h'(x)}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}\\underset{\\epsilon \\to 0}\\lim{h(x+\\epsilon)} + g(x)h'(x) && \\quad \\text{since the limit of a product is the product of the limits}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}h(x) + h(x)g'(x) && \\quad \\text{since } h(x) \\text{ is continuous}\\\\\n",
-    "&& = g'(x)h(x) + g(x)h'(x) && \\quad \\text{using the definition of }g'(x)\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x)}{\\epsilon} & \\quad \\text{using }f(x) = g(x)h(x) \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon) + g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} & \\quad \\text{subtracting and adding }g(x)h(x + \\epsilon)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} & \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, \\underset{\\epsilon \\to 0}\\lim{\\left[g(x)\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}\\right]} & \\quad \\text{factorizing }h(x+\\epsilon) \\text{ and } g(x)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)\\underset{\\epsilon \\to 0}\\lim{\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}} & \\quad \\text{taking } g(x) \\text{ out of the limit since it does not depend on }\\epsilon\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)h'(x) & \\quad \\text{using the definition of h'(x)}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}\\underset{\\epsilon \\to 0}\\lim{h(x+\\epsilon)} + g(x)h'(x) & \\quad \\text{since the limit of a product is the product of the limits}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}h(x) + h(x)g'(x) & \\quad \\text{since } h(x) \\text{ is continuous}\\\\\n",
+    "& = g'(x)h(x) + g(x)h'(x) & \\quad \\text{using the definition of }g'(x)\n",
+    "\\end{align*}\n",
     "$\n",
     "\n",
     "Note that if $g(x)=c$ (a constant), then $g'(x)=0$, so the equation simplifies to:\n",
@@ -1353,18 +1353,18 @@
     "## Chain rule: $f(x)=g(h(x))$\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon} && \\quad \\text{using }f(x) = g(h(x))\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{h(x+\\epsilon)-h(x)}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon}\\right]} && \\quad \\text{multiplying and dividing by }h(x+\\epsilon) - h(x)\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} && \\quad \\text{swapping the denominators}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\right]} \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} && \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
-    "&& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} && \\quad \\text{using the definition of }h'(x)\\\\\n",
-    "&& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} && \\quad \\text{using }u=h(x+\\epsilon) \\text{ and } v=h(x)\\\\\n",
-    "&& = h'(x) \\underset{u \\to v}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} && \\quad \\text{ since } h \\text{ is continuous, so } \\underset{\\epsilon \\to 0}\\lim{u}=v\\\\\n",
-    "&& = h'(x)g'(v) && \\quad \\text{ using the definition of } g'(v)\\\\\n",
-    "&& = h'(x)g'(h(x)) && \\quad \\text{ since } v = h(x)\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon} & \\quad \\text{using }f(x) = g(h(x))\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{h(x+\\epsilon)-h(x)}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon}\\right]} & \\quad \\text{multiplying and dividing by }h(x+\\epsilon) - h(x)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} & \\quad \\text{swapping the denominators}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\right]} \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} & \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
+    "& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} & \\quad \\text{using the definition of }h'(x)\\\\\n",
+    "& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} & \\quad \\text{using }u=h(x+\\epsilon) \\text{ and } v=h(x)\\\\\n",
+    "& = h'(x) \\underset{u \\to v}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} & \\quad \\text{ since } h \\text{ is continuous, so } \\underset{\\epsilon \\to 0}\\lim{u}=v\\\\\n",
+    "& = h'(x)g'(v) & \\quad \\text{ using the definition of } g'(v)\\\\\n",
+    "& = h'(x)g'(h(x)) & \\quad \\text{ since } v = h(x)\n",
+    "\\end{align*}\n",
     "$"
    ]
   },
@@ -1380,15 +1380,15 @@
     "There are several equivalent definitions of the number $e$. One of them states that $e$ is the unique positive number for which $\\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}}=1$. We will use this in this proof:\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^{x+\\epsilon} - e^x}{\\epsilon} && \\quad \\text{using }f(x) = e^x\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^x e^\\epsilon - e^x}{\\epsilon} && \\quad \\text{using the fact that } x^{a+b}=x^a x^b\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[e^x\\dfrac{e^\\epsilon - 1}{\\epsilon}\\right]} && \\quad \\text{factoring out }e^x\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{e^x} \\, \\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}} && \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{e^x} && \\quad \\text{since }\\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}}=1\\\\\n",
-    "&& = e^x && \\quad \\text{since } e^x \\text{ does not depend on }\\epsilon\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^{x+\\epsilon} - e^x}{\\epsilon} & \\quad \\text{using }f(x) = e^x\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^x e^\\epsilon - e^x}{\\epsilon} & \\quad \\text{using the fact that } x^{a+b}=x^a x^b\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[e^x\\dfrac{e^\\epsilon - 1}{\\epsilon}\\right]} & \\quad \\text{factoring out }e^x\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{e^x} \\, \\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}} & \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{e^x} & \\quad \\text{since }\\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}}=1\\\\\n",
+    "& = e^x & \\quad \\text{since } e^x \\text{ does not depend on }\\epsilon\n",
+    "\\end{align*}\n",
     "$\n"
    ]
   },
@@ -1412,19 +1412,19 @@
     "This will come in handy in a second:\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln(x+\\epsilon) - \\ln(x)}{\\epsilon} && \\quad \\text{using }f(x) = \\ln(x)\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln\\left(\\dfrac{x+\\epsilon}{x}\\right)}{\\epsilon} && \\quad \\text{since }\\ln(a)-\\ln(b)=\\ln\\left(\\dfrac{a}{b}\\right)\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{\\epsilon} \\, \\ln\\left(1 + \\dfrac{\\epsilon}{x}\\right)\\right]} && \\quad \\text{just moving things around a bit}\\\\\n",
-    "&& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} && \\quad \\text{defining }u=\\dfrac{\\epsilon}{x} \\text{ and thus } \\epsilon=xu\\\\\n",
-    "&& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} && \\quad \\text{replacing } \\underset{\\epsilon \\to 0}\\lim \\text{ with } \\underset{u \\to 0}\\lim \\text{ since }\\underset{\\epsilon \\to 0}\\lim u=0\\\\\n",
-    "&& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{x} \\, \\ln\\left((1 + u)^{1/u}\\right)\\right]} && \\quad \\text{since }a\\ln(b)=\\ln(a^b)\\\\\n",
-    "&& = \\dfrac{1}{x}\\underset{u \\to 0}\\lim{\\left[\\ln\\left((1 + u)^{1/u}\\right)\\right]} && \\quad \\text{taking }\\dfrac{1}{x} \\text{ out since it does not depend on }\\epsilon\\\\\n",
-    "&& = \\dfrac{1}{x}\\ln\\left(\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\right) && \\quad \\text{taking }\\ln\\text{ out since it is a continuous function}\\\\\n",
-    "&& = \\dfrac{1}{x}\\ln(e) && \\quad \\text{since }e=\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\\\\n",
-    "&& = \\dfrac{1}{x} && \\quad \\text{since }\\ln(e)=1\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln(x+\\epsilon) - \\ln(x)}{\\epsilon} & \\quad \\text{using }f(x) = \\ln(x)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln\\left(\\dfrac{x+\\epsilon}{x}\\right)}{\\epsilon} & \\quad \\text{since }\\ln(a)-\\ln(b)=\\ln\\left(\\dfrac{a}{b}\\right)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{\\epsilon} \\, \\ln\\left(1 + \\dfrac{\\epsilon}{x}\\right)\\right]} & \\quad \\text{just moving things around a bit}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} & \\quad \\text{defining }u=\\dfrac{\\epsilon}{x} \\text{ and thus } \\epsilon=xu\\\\\n",
+    "& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} & \\quad \\text{replacing } \\underset{\\epsilon \\to 0}\\lim \\text{ with } \\underset{u \\to 0}\\lim \\text{ since }\\underset{\\epsilon \\to 0}\\lim u=0\\\\\n",
+    "& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{x} \\, \\ln\\left((1 + u)^{1/u}\\right)\\right]} & \\quad \\text{since }a\\ln(b)=\\ln(a^b)\\\\\n",
+    "& = \\dfrac{1}{x}\\underset{u \\to 0}\\lim{\\left[\\ln\\left((1 + u)^{1/u}\\right)\\right]} & \\quad \\text{taking }\\dfrac{1}{x} \\text{ out since it does not depend on }\\epsilon\\\\\n",
+    "& = \\dfrac{1}{x}\\ln\\left(\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\right) & \\quad \\text{taking }\\ln\\text{ out since it is a continuous function}\\\\\n",
+    "& = \\dfrac{1}{x}\\ln(e) & \\quad \\text{since }e=\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\\\\n",
+    "& = \\dfrac{1}{x} & \\quad \\text{since }\\ln(e)=1\n",
+    "\\end{align*}\n",
     "$\n"
    ]
   },
@@ -1655,16 +1655,16 @@
     "Now the second thing we need to prove before we can tackle the derivative of the $\\sin$ function is the fact that $\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}=0$. Here we go:\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} && =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}\\frac{\\cos(\\theta) + 1}{\\cos(\\theta) + 1} && \\quad \\text{ multiplying and dividing by }\\cos(\\theta)+1\\\\\n",
-    "&& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos^2(\\theta) - 1}{\\theta(\\cos(\\theta) + 1)} && \\quad \\text{ since }(a-1)(a+1)=a^2-1\\\\\n",
-    "&& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin^2(\\theta)}{\\theta(\\cos(\\theta) + 1)} && \\quad \\text{ since }\\cos^2(\\theta) - 1 = \\sin^2(\\theta)\\\\\n",
-    "&& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} && \\quad \\text{ just rearranging the terms}\\\\\n",
-    "&& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} \\, \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} && \\quad \\text{ since the limit of a product is the product of the limits}\\\\\n",
-    "&& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} && \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
-    "&& =  \\dfrac{0}{1+1} && \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\sin(\\theta)=0 \\text{ and } \\underset{\\theta \\to 0}\\lim\\cos(\\theta)=1\\\\\n",
-    "&& =  0\\\\\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} & =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}\\frac{\\cos(\\theta) + 1}{\\cos(\\theta) + 1} & \\quad \\text{ multiplying and dividing by }\\cos(\\theta)+1\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos^2(\\theta) - 1}{\\theta(\\cos(\\theta) + 1)} & \\quad \\text{ since }(a-1)(a+1)=a^2-1\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin^2(\\theta)}{\\theta(\\cos(\\theta) + 1)} & \\quad \\text{ since }\\cos^2(\\theta) - 1 = \\sin^2(\\theta)\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} & \\quad \\text{ just rearranging the terms}\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} \\, \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} & \\quad \\text{ since the limit of a product is the product of the limits}\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} & \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
+    "& =  \\dfrac{0}{1+1} & \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\sin(\\theta)=0 \\text{ and } \\underset{\\theta \\to 0}\\lim\\cos(\\theta)=1\\\\\n",
+    "& =  0\\\\\n",
+    "\\end{align*}\n",
     "$\n",
     "\n",
     "<hr />\n",
@@ -1694,15 +1694,15 @@
    },
    "source": [
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x+\\theta) - \\sin(x)}{\\theta} && \\quad \\text{using }f(x) = \\sin(x)\\\\\n",
-    "&& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta) + \\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} && \\quad \\text{since } cos(a+b)=\\cos(a)\\sin(b)+\\sin(a)\\cos(b)\\\\\n",
-    "&& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta)}{\\theta} + \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} && \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "&& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} + \\sin(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} && \\quad \\text{bringing out } \\cos(x) \\text{ and } \\sin(x) \\text{ since they don't depend on }\\theta\\\\\n",
-    "&& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} && \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}=0\\\\\n",
-    "&& = \\cos(x) && \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x+\\theta) - \\sin(x)}{\\theta} & \\quad \\text{using }f(x) = \\sin(x)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta) + \\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} & \\quad \\text{since } cos(a+b)=\\cos(a)\\sin(b)+\\sin(a)\\cos(b)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta)}{\\theta} + \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} & \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} + \\sin(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} & \\quad \\text{bringing out } \\cos(x) \\text{ and } \\sin(x) \\text{ since they don't depend on }\\theta\\\\\n",
+    "& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} & \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}=0\\\\\n",
+    "& = \\cos(x) & \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
+    "\\end{align*}\n",
     "$\n"
    ]
   },
@@ -1718,16 +1718,16 @@
     "Since we have proven that $\\sin'(x)=\\cos(x)$, proving that $\\cos'(x)=-\\sin(x)$ will be much easier.\n",
     "\n",
     "$\n",
-    "\\begin{split}\n",
-    "f'(x) && = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} && \\quad\\text{by definition}\\\\\n",
-    "&& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x+\\theta) - \\cos(x)}{\\theta} && \\quad \\text{using }f(x) = \\cos(x)\\\\\n",
-    "&& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin\\left(x+\\dfrac{\\pi}{2}+\\theta\\right) - \\sin\\left(x+\\dfrac{\\pi}{2}\\right)}{\\theta} && \\quad \\text{since }\\cos(x) = \\sin\\left(x+\\dfrac{\\pi}{2}\\right)\\\\\n",
-    "&& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(z+\\theta) - \\sin(z)}{\\theta} && \\quad \\text{using }z = x + \\dfrac{\\pi}{2}\\\\\n",
-    "&& = \\sin'(z) && \\quad \\text{using the definition of }\\sin'(z)\\\\\n",
-    "&& = \\cos(z) && \\quad \\text{since we proved that }\\sin'(z)=\\cos(z)\\\\\n",
-    "&& = \\cos\\left(x + \\dfrac{\\pi}{2}\\right) && \\quad \\text{using the definition of }z\\\\\n",
-    "&& = -\\sin(x) && \\quad \\text{using this well-known rule of trigonometry}\n",
-    "\\end{split}\n",
+    "\\begin{align*}\n",
+    "f'(x) & = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} & \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x+\\theta) - \\cos(x)}{\\theta} & \\quad \\text{using }f(x) = \\cos(x)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin\\left(x+\\dfrac{\\pi}{2}+\\theta\\right) - \\sin\\left(x+\\dfrac{\\pi}{2}\\right)}{\\theta} & \\quad \\text{since }\\cos(x) = \\sin\\left(x+\\dfrac{\\pi}{2}\\right)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(z+\\theta) - \\sin(z)}{\\theta} & \\quad \\text{using }z = x + \\dfrac{\\pi}{2}\\\\\n",
+    "& = \\sin'(z) & \\quad \\text{using the definition of }\\sin'(z)\\\\\n",
+    "& = \\cos(z) & \\quad \\text{since we proved that }\\sin'(z)=\\cos(z)\\\\\n",
+    "& = \\cos\\left(x + \\dfrac{\\pi}{2}\\right) & \\quad \\text{using the definition of }z\\\\\n",
+    "& = -\\sin(x) & \\quad \\text{using this well-known rule of trigonometry}\n",
+    "\\end{align*}\n",
     "$\n"
    ]
   },

From 927aa58a882531cc1942c410507c509630999c75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joaqu=C3=ADn=20Ruales?=
 <1588988+jruales@users.noreply.github.com>
Date: Wed, 6 Oct 2021 15:48:12 -0700
Subject: [PATCH 11/16] Keep left-alignment of math step column

---
 math_differential_calculus.ipynb | 162 +++++++++++++++----------------
 1 file changed, 81 insertions(+), 81 deletions(-)

diff --git a/math_differential_calculus.ipynb b/math_differential_calculus.ipynb
index 9310834..c118d7c 100644
--- a/math_differential_calculus.ipynb
+++ b/math_differential_calculus.ipynb
@@ -441,12 +441,12 @@
     "$\n",
     "\\begin{align*}\n",
     "f'(x_\\mathrm{A}) \\, & = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{f(x_\\mathrm{B}) - f(x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}} \\\\\n",
-    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{{x_\\mathrm{B}}^2 - {x_\\mathrm{A}}^2}{x_\\mathrm{B} - x_\\mathrm{A}} \\quad & \\text{since } f(x) = x^2\\\\\n",
-    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{(x_\\mathrm{B} - x_\\mathrm{A})(x_\\mathrm{B} + x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}}\\quad & \\text{since } {x_\\mathrm{A}}^2 - {x_\\mathrm{B}}^2 = (x_\\mathrm{A}-x_\\mathrm{B})(x_\\mathrm{A}+x_\\mathrm{B})\\\\\n",
-    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim(x_\\mathrm{B} + x_\\mathrm{A})\\quad & \\text{since the two } (x_\\mathrm{B} - x_\\mathrm{A}) \\text{ cancel out}\\\\\n",
-    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{B} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A}\\quad & \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "& = x_\\mathrm{A} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A} \\quad & \\text{since } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
-    "& = x_\\mathrm{A} + x_\\mathrm{A} \\quad & \\text{since } x_\\mathrm{A} \\text{ remains constant when } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{{x_\\mathrm{B}}^2 - {x_\\mathrm{A}}^2}{x_\\mathrm{B} - x_\\mathrm{A}} \\quad && \\text{since } f(x) = x^2\\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim\\dfrac{(x_\\mathrm{B} - x_\\mathrm{A})(x_\\mathrm{B} + x_\\mathrm{A})}{x_\\mathrm{B} - x_\\mathrm{A}}\\quad && \\text{since } {x_\\mathrm{A}}^2 - {x_\\mathrm{B}}^2 = (x_\\mathrm{A}-x_\\mathrm{B})(x_\\mathrm{A}+x_\\mathrm{B})\\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim(x_\\mathrm{B} + x_\\mathrm{A})\\quad && \\text{since the two } (x_\\mathrm{B} - x_\\mathrm{A}) \\text{ cancel out}\\\\\n",
+    "& = \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{B} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A}\\quad && \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = x_\\mathrm{A} \\, + \\underset{x_\\mathrm{B} \\to x_\\mathrm{A}}\\lim x_\\mathrm{A} \\quad && \\text{since } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
+    "& = x_\\mathrm{A} + x_\\mathrm{A} \\quad && \\text{since } x_\\mathrm{A} \\text{ remains constant when } x_\\mathrm{B}\\text{ approaches } x_\\mathrm{A} \\\\\n",
     "& = 2 x_\\mathrm{A}\n",
     "\\end{align*}\n",
     "$\n",
@@ -519,10 +519,10 @@
     "$\n",
     "\\begin{align*}\n",
     "f'(x) \\, & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x + \\epsilon) - f(x)}{\\epsilon} \\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{(x + \\epsilon)^2 - {x}^2}{\\epsilon} \\quad & \\text{since } f(x) = x^2\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{{x}^2 + 2x\\epsilon + \\epsilon^2 - {x}^2}{\\epsilon}\\quad & \\text{since } (x + \\epsilon)^2 = {x}^2 + 2x\\epsilon + \\epsilon^2\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{2x\\epsilon + \\epsilon^2}{\\epsilon}\\quad & \\text{since the two } {x}^2 \\text{ cancel out}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim \\, (2x + \\epsilon)\\quad & \\text{since } 2x\\epsilon \\text{ and } \\epsilon^2 \\text{ can both be divided by } \\epsilon\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{(x + \\epsilon)^2 - {x}^2}{\\epsilon} \\quad && \\text{since } f(x) = x^2\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{{x}^2 + 2x\\epsilon + \\epsilon^2 - {x}^2}{\\epsilon}\\quad && \\text{since } (x + \\epsilon)^2 = {x}^2 + 2x\\epsilon + \\epsilon^2\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{2x\\epsilon + \\epsilon^2}{\\epsilon}\\quad && \\text{since the two } {x}^2 \\text{ cancel out}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim \\, (2x + \\epsilon)\\quad && \\text{since } 2x\\epsilon \\text{ and } \\epsilon^2 \\text{ can both be divided by } \\epsilon\\\\\n",
     "& = 2 x\n",
     "\\end{align*}\n",
     "$\n",
@@ -706,11 +706,11 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) + h(x+\\epsilon) - g(x) - h(x)}{\\epsilon} & \\quad \\text{using }f(x) = g(x) + h(x) \\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x) + h(x+\\epsilon) - h(x)}{\\epsilon} & \\quad \\text{just moving terms around}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{h(x+\\epsilon) - h(x)}{\\epsilon} & \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "& = g'(x) + h'(x) & \\quad \\text{using the definitions of }g'(x) \\text{ and } h'(x)\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) + h(x+\\epsilon) - g(x) - h(x)}{\\epsilon} && \\quad \\text{using }f(x) = g(x) + h(x) \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x) + h(x+\\epsilon) - h(x)}{\\epsilon} && \\quad \\text{just moving terms around}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{h(x+\\epsilon) - h(x)}{\\epsilon} && \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = g'(x) + h'(x) && \\quad \\text{using the definitions of }g'(x) \\text{ and } h'(x)\n",
     "\\end{align*}\n",
     "$"
    ]
@@ -1220,7 +1220,7 @@
     "& \\dfrac{\\partial f_2}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
     "& \\dots\n",
     "& \\dfrac{\\partial f_2}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
-    "\\vdots & \\vdots & \\ddots & \\vdots \\\\\n",
+    "\\vdots & \\vdots && \\ddots && \\vdots \\\\\n",
     "\\dfrac{\\partial f_m}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
     "& \\dfrac{\\partial f_m}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
     "& \\dots\n",
@@ -1264,7 +1264,7 @@
     "& \\dfrac{\\partial^2 f}{\\partial {x_2}^2}(\\mathbf{x}_\\mathbf{A})\n",
     "& \\dots\n",
     "& \\dfrac{\\partial^2 f}{\\partial x_2\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
-    "\\vdots & \\vdots & \\ddots & \\vdots \\\\\n",
+    "\\vdots & \\vdots && \\ddots && \\vdots \\\\\n",
     "\\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
     "& \\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
     "& \\dots\n",
@@ -1306,10 +1306,10 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{c - c}{\\epsilon} & \\quad \\text{using }f(x) = c \\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim 0 & \\quad \\text{since }c - c = 0\\\\\n",
-    "& = 0 & \\quad \\text{since the limit of a constant is that constant}\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{c - c}{\\epsilon} && \\quad \\text{using }f(x) = c \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim 0 && \\quad \\text{since }c - c = 0\\\\\n",
+    "& = 0 && \\quad \\text{since the limit of a constant is that constant}\n",
     "\\end{align*}\n",
     "$\n"
    ]
@@ -1325,16 +1325,16 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x)}{\\epsilon} & \\quad \\text{using }f(x) = g(x)h(x) \\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon) + g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} & \\quad \\text{subtracting and adding }g(x)h(x + \\epsilon)\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} & \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, \\underset{\\epsilon \\to 0}\\lim{\\left[g(x)\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}\\right]} & \\quad \\text{factorizing }h(x+\\epsilon) \\text{ and } g(x)\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)\\underset{\\epsilon \\to 0}\\lim{\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}} & \\quad \\text{taking } g(x) \\text{ out of the limit since it does not depend on }\\epsilon\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)h'(x) & \\quad \\text{using the definition of h'(x)}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}\\underset{\\epsilon \\to 0}\\lim{h(x+\\epsilon)} + g(x)h'(x) & \\quad \\text{since the limit of a product is the product of the limits}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}h(x) + h(x)g'(x) & \\quad \\text{since } h(x) \\text{ is continuous}\\\\\n",
-    "& = g'(x)h(x) + g(x)h'(x) & \\quad \\text{using the definition of }g'(x)\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x)}{\\epsilon} && \\quad \\text{using }f(x) = g(x)h(x) \\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon) + g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} && \\quad \\text{subtracting and adding }g(x)h(x + \\epsilon)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x+\\epsilon)h(x+\\epsilon) - g(x)h(x+\\epsilon)}{\\epsilon} + \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(x)h(x + \\epsilon) - g(x)h(x)}{\\epsilon} && \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, \\underset{\\epsilon \\to 0}\\lim{\\left[g(x)\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}\\right]} && \\quad \\text{factorizing }h(x+\\epsilon) \\text{ and } g(x)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)\\underset{\\epsilon \\to 0}\\lim{\\dfrac{h(x + \\epsilon) - h(x)}{\\epsilon}} && \\quad \\text{taking } g(x) \\text{ out of the limit since it does not depend on }\\epsilon\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}h(x+\\epsilon)\\right]} \\,+\\, g(x)h'(x) && \\quad \\text{using the definition of h'(x)}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}\\underset{\\epsilon \\to 0}\\lim{h(x+\\epsilon)} + g(x)h'(x) && \\quad \\text{since the limit of a product is the product of the limits}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(x+\\epsilon) - g(x)}{\\epsilon}\\right]}h(x) + h(x)g'(x) && \\quad \\text{since } h(x) \\text{ is continuous}\\\\\n",
+    "& = g'(x)h(x) + g(x)h'(x) && \\quad \\text{using the definition of }g'(x)\n",
     "\\end{align*}\n",
     "$\n",
     "\n",
@@ -1354,16 +1354,16 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon} & \\quad \\text{using }f(x) = g(h(x))\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{h(x+\\epsilon)-h(x)}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon}\\right]} & \\quad \\text{multiplying and dividing by }h(x+\\epsilon) - h(x)\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} & \\quad \\text{swapping the denominators}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\right]} \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} & \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
-    "& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} & \\quad \\text{using the definition of }h'(x)\\\\\n",
-    "& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} & \\quad \\text{using }u=h(x+\\epsilon) \\text{ and } v=h(x)\\\\\n",
-    "& = h'(x) \\underset{u \\to v}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} & \\quad \\text{ since } h \\text{ is continuous, so } \\underset{\\epsilon \\to 0}\\lim{u}=v\\\\\n",
-    "& = h'(x)g'(v) & \\quad \\text{ using the definition of } g'(v)\\\\\n",
-    "& = h'(x)g'(h(x)) & \\quad \\text{ since } v = h(x)\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon} && \\quad \\text{using }f(x) = g(h(x))\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{h(x+\\epsilon)-h(x)}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{\\epsilon}\\right]} && \\quad \\text{multiplying and dividing by }h(x+\\epsilon) - h(x)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\,\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} && \\quad \\text{swapping the denominators}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{h(x+\\epsilon)-h(x)}{\\epsilon}\\right]} \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} && \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
+    "& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(h(x+\\epsilon)) - g(h(x))}{h(x+\\epsilon)-h(x)}\\right]} && \\quad \\text{using the definition of }h'(x)\\\\\n",
+    "& = h'(x) \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} && \\quad \\text{using }u=h(x+\\epsilon) \\text{ and } v=h(x)\\\\\n",
+    "& = h'(x) \\underset{u \\to v}\\lim{\\left[\\dfrac{g(u) - g(v)}{u-v}\\right]} && \\quad \\text{ since } h \\text{ is continuous, so } \\underset{\\epsilon \\to 0}\\lim{u}=v\\\\\n",
+    "& = h'(x)g'(v) && \\quad \\text{ using the definition of } g'(v)\\\\\n",
+    "& = h'(x)g'(h(x)) && \\quad \\text{ since } v = h(x)\n",
     "\\end{align*}\n",
     "$"
    ]
@@ -1381,13 +1381,13 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^{x+\\epsilon} - e^x}{\\epsilon} & \\quad \\text{using }f(x) = e^x\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^x e^\\epsilon - e^x}{\\epsilon} & \\quad \\text{using the fact that } x^{a+b}=x^a x^b\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[e^x\\dfrac{e^\\epsilon - 1}{\\epsilon}\\right]} & \\quad \\text{factoring out }e^x\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{e^x} \\, \\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}} & \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{e^x} & \\quad \\text{since }\\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}}=1\\\\\n",
-    "& = e^x & \\quad \\text{since } e^x \\text{ does not depend on }\\epsilon\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^{x+\\epsilon} - e^x}{\\epsilon} && \\quad \\text{using }f(x) = e^x\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{e^x e^\\epsilon - e^x}{\\epsilon} && \\quad \\text{using the fact that } x^{a+b}=x^a x^b\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[e^x\\dfrac{e^\\epsilon - 1}{\\epsilon}\\right]} && \\quad \\text{factoring out }e^x\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{e^x} \\, \\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}} && \\quad \\text{the limit of a product is the product of the limits}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{e^x} && \\quad \\text{since }\\underset{\\epsilon \\to 0}\\lim{\\dfrac{e^\\epsilon - 1}{\\epsilon}}=1\\\\\n",
+    "& = e^x && \\quad \\text{since } e^x \\text{ does not depend on }\\epsilon\n",
     "\\end{align*}\n",
     "$\n"
    ]
@@ -1413,17 +1413,17 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln(x+\\epsilon) - \\ln(x)}{\\epsilon} & \\quad \\text{using }f(x) = \\ln(x)\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln\\left(\\dfrac{x+\\epsilon}{x}\\right)}{\\epsilon} & \\quad \\text{since }\\ln(a)-\\ln(b)=\\ln\\left(\\dfrac{a}{b}\\right)\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{\\epsilon} \\, \\ln\\left(1 + \\dfrac{\\epsilon}{x}\\right)\\right]} & \\quad \\text{just moving things around a bit}\\\\\n",
-    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} & \\quad \\text{defining }u=\\dfrac{\\epsilon}{x} \\text{ and thus } \\epsilon=xu\\\\\n",
-    "& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} & \\quad \\text{replacing } \\underset{\\epsilon \\to 0}\\lim \\text{ with } \\underset{u \\to 0}\\lim \\text{ since }\\underset{\\epsilon \\to 0}\\lim u=0\\\\\n",
-    "& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{x} \\, \\ln\\left((1 + u)^{1/u}\\right)\\right]} & \\quad \\text{since }a\\ln(b)=\\ln(a^b)\\\\\n",
-    "& = \\dfrac{1}{x}\\underset{u \\to 0}\\lim{\\left[\\ln\\left((1 + u)^{1/u}\\right)\\right]} & \\quad \\text{taking }\\dfrac{1}{x} \\text{ out since it does not depend on }\\epsilon\\\\\n",
-    "& = \\dfrac{1}{x}\\ln\\left(\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\right) & \\quad \\text{taking }\\ln\\text{ out since it is a continuous function}\\\\\n",
-    "& = \\dfrac{1}{x}\\ln(e) & \\quad \\text{since }e=\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\\\\n",
-    "& = \\dfrac{1}{x} & \\quad \\text{since }\\ln(e)=1\n",
+    "f'(x) & = \\underset{\\epsilon \\to 0}\\lim\\dfrac{f(x+\\epsilon) - f(x)}{\\epsilon} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln(x+\\epsilon) - \\ln(x)}{\\epsilon} && \\quad \\text{using }f(x) = \\ln(x)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim\\dfrac{\\ln\\left(\\dfrac{x+\\epsilon}{x}\\right)}{\\epsilon} && \\quad \\text{since }\\ln(a)-\\ln(b)=\\ln\\left(\\dfrac{a}{b}\\right)\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{\\epsilon} \\, \\ln\\left(1 + \\dfrac{\\epsilon}{x}\\right)\\right]} && \\quad \\text{just moving things around a bit}\\\\\n",
+    "& = \\underset{\\epsilon \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} && \\quad \\text{defining }u=\\dfrac{\\epsilon}{x} \\text{ and thus } \\epsilon=xu\\\\\n",
+    "& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{xu} \\, \\ln\\left(1 + u\\right)\\right]} && \\quad \\text{replacing } \\underset{\\epsilon \\to 0}\\lim \\text{ with } \\underset{u \\to 0}\\lim \\text{ since }\\underset{\\epsilon \\to 0}\\lim u=0\\\\\n",
+    "& = \\underset{u \\to 0}\\lim{\\left[\\dfrac{1}{x} \\, \\ln\\left((1 + u)^{1/u}\\right)\\right]} && \\quad \\text{since }a\\ln(b)=\\ln(a^b)\\\\\n",
+    "& = \\dfrac{1}{x}\\underset{u \\to 0}\\lim{\\left[\\ln\\left((1 + u)^{1/u}\\right)\\right]} && \\quad \\text{taking }\\dfrac{1}{x} \\text{ out since it does not depend on }\\epsilon\\\\\n",
+    "& = \\dfrac{1}{x}\\ln\\left(\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\right) && \\quad \\text{taking }\\ln\\text{ out since it is a continuous function}\\\\\n",
+    "& = \\dfrac{1}{x}\\ln(e) && \\quad \\text{since }e=\\underset{u \\to 0}\\lim{(1 + u)^{1/u}}\\\\\n",
+    "& = \\dfrac{1}{x} && \\quad \\text{since }\\ln(e)=1\n",
     "\\end{align*}\n",
     "$\n"
    ]
@@ -1656,13 +1656,13 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} & =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}\\frac{\\cos(\\theta) + 1}{\\cos(\\theta) + 1} & \\quad \\text{ multiplying and dividing by }\\cos(\\theta)+1\\\\\n",
-    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos^2(\\theta) - 1}{\\theta(\\cos(\\theta) + 1)} & \\quad \\text{ since }(a-1)(a+1)=a^2-1\\\\\n",
-    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin^2(\\theta)}{\\theta(\\cos(\\theta) + 1)} & \\quad \\text{ since }\\cos^2(\\theta) - 1 = \\sin^2(\\theta)\\\\\n",
-    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} & \\quad \\text{ just rearranging the terms}\\\\\n",
-    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} \\, \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} & \\quad \\text{ since the limit of a product is the product of the limits}\\\\\n",
-    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} & \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
-    "& =  \\dfrac{0}{1+1} & \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\sin(\\theta)=0 \\text{ and } \\underset{\\theta \\to 0}\\lim\\cos(\\theta)=1\\\\\n",
+    "\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} & =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}\\frac{\\cos(\\theta) + 1}{\\cos(\\theta) + 1} && \\quad \\text{ multiplying and dividing by }\\cos(\\theta)+1\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos^2(\\theta) - 1}{\\theta(\\cos(\\theta) + 1)} && \\quad \\text{ since }(a-1)(a+1)=a^2-1\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin^2(\\theta)}{\\theta(\\cos(\\theta) + 1)} && \\quad \\text{ since }\\cos^2(\\theta) - 1 = \\sin^2(\\theta)\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} && \\quad \\text{ just rearranging the terms}\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} \\, \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} && \\quad \\text{ since the limit of a product is the product of the limits}\\\\\n",
+    "& =  \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\cos(\\theta) + 1} && \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
+    "& =  \\dfrac{0}{1+1} && \\quad \\text{ since } \\underset{\\theta \\to 0}\\lim\\sin(\\theta)=0 \\text{ and } \\underset{\\theta \\to 0}\\lim\\cos(\\theta)=1\\\\\n",
     "& =  0\\\\\n",
     "\\end{align*}\n",
     "$\n",
@@ -1695,13 +1695,13 @@
    "source": [
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x+\\theta) - \\sin(x)}{\\theta} & \\quad \\text{using }f(x) = \\sin(x)\\\\\n",
-    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta) + \\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} & \\quad \\text{since } cos(a+b)=\\cos(a)\\sin(b)+\\sin(a)\\cos(b)\\\\\n",
-    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta)}{\\theta} + \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} & \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
-    "& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} + \\sin(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} & \\quad \\text{bringing out } \\cos(x) \\text{ and } \\sin(x) \\text{ since they don't depend on }\\theta\\\\\n",
-    "& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} & \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}=0\\\\\n",
-    "& = \\cos(x) & \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
+    "f'(x) & = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x+\\theta) - \\sin(x)}{\\theta} && \\quad \\text{using }f(x) = \\sin(x)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta) + \\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} && \\quad \\text{since } cos(a+b)=\\cos(a)\\sin(b)+\\sin(a)\\cos(b)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x)\\sin(\\theta)}{\\theta} + \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(x)\\cos(\\theta) - \\sin(x)}{\\theta} && \\quad \\text{since the limit of a sum is the sum of the limits}\\\\\n",
+    "& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} + \\sin(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta} && \\quad \\text{bringing out } \\cos(x) \\text{ and } \\sin(x) \\text{ since they don't depend on }\\theta\\\\\n",
+    "& = \\cos(x)\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta} && \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(\\theta) - 1}{\\theta}=0\\\\\n",
+    "& = \\cos(x) && \\quad \\text{since }\\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(\\theta)}{\\theta}=1\\\\\n",
     "\\end{align*}\n",
     "$\n"
    ]
@@ -1719,14 +1719,14 @@
     "\n",
     "$\n",
     "\\begin{align*}\n",
-    "f'(x) & = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} & \\quad\\text{by definition}\\\\\n",
-    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x+\\theta) - \\cos(x)}{\\theta} & \\quad \\text{using }f(x) = \\cos(x)\\\\\n",
-    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin\\left(x+\\dfrac{\\pi}{2}+\\theta\\right) - \\sin\\left(x+\\dfrac{\\pi}{2}\\right)}{\\theta} & \\quad \\text{since }\\cos(x) = \\sin\\left(x+\\dfrac{\\pi}{2}\\right)\\\\\n",
-    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(z+\\theta) - \\sin(z)}{\\theta} & \\quad \\text{using }z = x + \\dfrac{\\pi}{2}\\\\\n",
-    "& = \\sin'(z) & \\quad \\text{using the definition of }\\sin'(z)\\\\\n",
-    "& = \\cos(z) & \\quad \\text{since we proved that }\\sin'(z)=\\cos(z)\\\\\n",
-    "& = \\cos\\left(x + \\dfrac{\\pi}{2}\\right) & \\quad \\text{using the definition of }z\\\\\n",
-    "& = -\\sin(x) & \\quad \\text{using this well-known rule of trigonometry}\n",
+    "f'(x) & = \\underset{\\theta \\to 0}\\lim\\dfrac{f(x+\\theta) - f(x)}{\\theta} && \\quad\\text{by definition}\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\cos(x+\\theta) - \\cos(x)}{\\theta} && \\quad \\text{using }f(x) = \\cos(x)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin\\left(x+\\dfrac{\\pi}{2}+\\theta\\right) - \\sin\\left(x+\\dfrac{\\pi}{2}\\right)}{\\theta} && \\quad \\text{since }\\cos(x) = \\sin\\left(x+\\dfrac{\\pi}{2}\\right)\\\\\n",
+    "& = \\underset{\\theta \\to 0}\\lim\\dfrac{\\sin(z+\\theta) - \\sin(z)}{\\theta} && \\quad \\text{using }z = x + \\dfrac{\\pi}{2}\\\\\n",
+    "& = \\sin'(z) && \\quad \\text{using the definition of }\\sin'(z)\\\\\n",
+    "& = \\cos(z) && \\quad \\text{since we proved that }\\sin'(z)=\\cos(z)\\\\\n",
+    "& = \\cos\\left(x + \\dfrac{\\pi}{2}\\right) && \\quad \\text{using the definition of }z\\\\\n",
+    "& = -\\sin(x) && \\quad \\text{using this well-known rule of trigonometry}\n",
     "\\end{align*}\n",
     "$\n"
    ]

From 3831acbcd2a1e7790cb0604592a563cd29baa2de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joaqu=C3=ADn=20Ruales?=
 <1588988+jruales@users.noreply.github.com>
Date: Wed, 6 Oct 2021 17:28:41 -0700
Subject: [PATCH 12/16] Revert changes to pmatrix, since it's unrelated

---
 math_differential_calculus.ipynb | 40 ++++++++++++++++----------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/math_differential_calculus.ipynb b/math_differential_calculus.ipynb
index c118d7c..c13d633 100644
--- a/math_differential_calculus.ipynb
+++ b/math_differential_calculus.ipynb
@@ -1213,18 +1213,18 @@
     "$\n",
     "\\mathbf{J}_\\mathbf{f}(\\mathbf{x}_\\mathbf{A}) = \\begin{pmatrix}\n",
     "\\dfrac{\\partial f_1}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dfrac{\\partial f_1}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dots\n",
-    "& \\dfrac{\\partial f_1}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "&& \\dfrac{\\partial f_1}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dots\n",
+    "&& \\dfrac{\\partial f_1}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
     "\\dfrac{\\partial f_2}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dfrac{\\partial f_2}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dots\n",
-    "& \\dfrac{\\partial f_2}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
-    "\\vdots & \\vdots && \\ddots && \\vdots \\\\\n",
+    "&& \\dfrac{\\partial f_2}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dots\n",
+    "&& \\dfrac{\\partial f_2}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "\\vdots && \\vdots && \\ddots && \\vdots \\\\\n",
     "\\dfrac{\\partial f_m}{\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dfrac{\\partial f_m}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dots\n",
-    "& \\dfrac{\\partial f_m}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dfrac{\\partial f_m}{\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dots\n",
+    "&& \\dfrac{\\partial f_m}{\\partial x_n}(\\mathbf{x}_\\mathbf{A})\n",
     "\\end{pmatrix}\n",
     "$\n",
     "\n",
@@ -1257,18 +1257,18 @@
     "$\n",
     "\\mathbf{H}_f(\\mathbf{x}_\\mathbf{A}) = \\begin{pmatrix}\n",
     "\\dfrac{\\partial^2 f}{\\partial {x_1}^2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dots\n",
-    "& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "&& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dots\n",
+    "&& \\dfrac{\\partial^2 f}{\\partial x_1\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
     "\\dfrac{\\partial^2 f}{\\partial x_2\\,\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dfrac{\\partial^2 f}{\\partial {x_2}^2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dots\n",
-    "& \\dfrac{\\partial^2 f}{\\partial x_2\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
-    "\\vdots & \\vdots && \\ddots && \\vdots \\\\\n",
+    "&& \\dfrac{\\partial^2 f}{\\partial {x_2}^2}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dots\n",
+    "&& \\dfrac{\\partial^2 f}{\\partial x_2\\, \\partial x_n}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "\\vdots && \\vdots && \\ddots && \\vdots \\\\\n",
     "\\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_1}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
-    "& \\dots\n",
-    "& \\dfrac{\\partial^2 f}{\\partial {x_n}^2}(\\mathbf{x}_\\mathbf{A})\\\\\n",
+    "&& \\dfrac{\\partial^2 f}{\\partial x_n\\,\\partial x_2}(\\mathbf{x}_\\mathbf{A})\n",
+    "&& \\dots\n",
+    "&& \\dfrac{\\partial^2 f}{\\partial {x_n}^2}(\\mathbf{x}_\\mathbf{A})\\\\\n",
     "\\end{pmatrix}\n",
     "$"
    ]

From aad6e5186a5f347a750e99e7cc6d656beb3145e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Thu, 7 Oct 2021 16:39:37 +1300
Subject: [PATCH 13/16] Target=blue dot, prediction=red cross, fixes #472

---
 ...essing_sequences_using_rnns_and_cnns.ipynb | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/15_processing_sequences_using_rnns_and_cnns.ipynb b/15_processing_sequences_using_rnns_and_cnns.ipynb
index 99dba80..d1410b3 100644
--- a/15_processing_sequences_using_rnns_and_cnns.ipynb
+++ b/15_processing_sequences_using_rnns_and_cnns.ipynb
@@ -162,12 +162,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\"):\n",
+    "def plot_series(series, y=None, y_pred=None, x_label=\"$t$\", y_label=\"$x(t)$\", legend=True):\n",
     "    plt.plot(series, \".-\")\n",
     "    if y is not None:\n",
-    "        plt.plot(n_steps, y, \"bx\", markersize=10)\n",
+    "        plt.plot(n_steps, y, \"bo\", label=\"Target\")\n",
     "    if y_pred is not None:\n",
-    "        plt.plot(n_steps, y_pred, \"ro\")\n",
+    "        plt.plot(n_steps, y_pred, \"rx\", markersize=10, label=\"Prediction\")\n",
     "    plt.grid(True)\n",
     "    if x_label:\n",
     "        plt.xlabel(x_label, fontsize=16)\n",
@@ -175,16 +175,26 @@
     "        plt.ylabel(y_label, fontsize=16, rotation=0)\n",
     "    plt.hlines(0, 0, 100, linewidth=1)\n",
     "    plt.axis([0, n_steps + 1, -1, 1])\n",
+    "    if legend and (y or y_pred):\n",
+    "        plt.legend(fontsize=14, loc=\"upper left\")\n",
     "\n",
     "fig, axes = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(12, 4))\n",
     "for col in range(3):\n",
     "    plt.sca(axes[col])\n",
     "    plot_series(X_valid[col, :, 0], y_valid[col, 0],\n",
-    "                y_label=(\"$x(t)$\" if col==0 else None))\n",
+    "                y_label=(\"$x(t)$\" if col==0 else None),\n",
+    "                legend=(col == 0))\n",
     "save_fig(\"time_series_plot\")\n",
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note**: in this notebook, the blue dots represent targets, and red crosses represent predictions. In the book, I first used blue crosses for targets and red dots for predictions, then I reversed this later in the chapter. Sorry if this caused some confusion."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -499,8 +509,8 @@
     "    n_steps = X.shape[1]\n",
     "    ahead = Y.shape[1]\n",
     "    plot_series(X[0, :, 0])\n",
-    "    plt.plot(np.arange(n_steps, n_steps + ahead), Y[0, :, 0], \"ro-\", label=\"Actual\")\n",
-    "    plt.plot(np.arange(n_steps, n_steps + ahead), Y_pred[0, :, 0], \"bx-\", label=\"Forecast\", markersize=10)\n",
+    "    plt.plot(np.arange(n_steps, n_steps + ahead), Y[0, :, 0], \"bo-\", label=\"Actual\")\n",
+    "    plt.plot(np.arange(n_steps, n_steps + ahead), Y_pred[0, :, 0], \"rx-\", label=\"Forecast\", markersize=10)\n",
     "    plt.axis([0, n_steps + ahead, -1, 1])\n",
     "    plt.legend(fontsize=14)\n",
     "\n",

From b3081ceab6a29496655197370d7d0c4e77f0385d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Thu, 7 Oct 2021 17:41:46 +1300
Subject: [PATCH 14/16] Use cloned model when reusing a pretrained model, fixes
 #454

---
 11_training_deep_neural_networks.ipynb | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/11_training_deep_neural_networks.ipynb b/11_training_deep_neural_networks.ipynb
index e9fdc9f..4fb717f 100644
--- a/11_training_deep_neural_networks.ipynb
+++ b/11_training_deep_neural_networks.ipynb
@@ -970,6 +970,13 @@
     "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `model_B_on_A` and `model_A` actually share layers now, so when we train one, it will update both models. If we want to avoid that, we need to build `model_B_on_A` on top of a *clone* of `model_A`:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 61,
@@ -977,7 +984,9 @@
    "outputs": [],
    "source": [
     "model_A_clone = keras.models.clone_model(model_A)\n",
-    "model_A_clone.set_weights(model_A.get_weights())"
+    "model_A_clone.set_weights(model_A.get_weights())\n",
+    "model_B_on_A = keras.models.Sequential(model_A_clone.layers[:-1])\n",
+    "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))"
    ]
   },
   {
@@ -1042,7 +1051,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Great! We got quite a bit of transfer: the error rate dropped by a factor of 4.5!"
+    "Great! We got quite a bit of transfer: the error rate dropped by a factor of 4.9!"
    ]
   },
   {
@@ -1051,7 +1060,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "(100 - 97.05) / (100 - 99.35)"
+    "(100 - 97.05) / (100 - 99.40)"
    ]
   },
   {

From fdb5d1695e3e5d06a6541ddae3d068f7394e3d3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Thu, 7 Oct 2021 18:10:34 +1300
Subject: [PATCH 15/16] Replace pip install mcpi with %pip which always
 installs in the right environment

---
 03_classification.ipynb                  | 9 ++++++++-
 13_loading_and_preprocessing_data.ipynb  | 2 +-
 16_nlp_with_rnns_and_attention.ipynb     | 4 ++--
 18_reinforcement_learning.ipynb          | 8 ++++----
 19_training_and_deploying_at_scale.ipynb | 2 +-
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/03_classification.ipynb b/03_classification.ipynb
index 6c8e0db..6c8c7fc 100644
--- a/03_classification.ipynb
+++ b/03_classification.ipynb
@@ -2374,7 +2374,14 @@
    "source": [
     "# if running this notebook on Colab or Kaggle, we just pip install urlextract\n",
     "if IS_COLAB or IS_KAGGLE:\n",
-    "    !pip install -q -U urlextract"
+    "    %pip install -q -U urlextract"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note:** inside a Jupyter notebook, always use `%pip` instead of `!pip`, as `!pip` may install the library inside the wrong environment, while `%pip` makes sure it's installed inside the currently running environment."
    ]
   },
   {
diff --git a/13_loading_and_preprocessing_data.ipynb b/13_loading_and_preprocessing_data.ipynb
index 8bb30cb..bd65f6b 100644
--- a/13_loading_and_preprocessing_data.ipynb
+++ b/13_loading_and_preprocessing_data.ipynb
@@ -52,7 +52,7 @@
     "IS_KAGGLE = \"kaggle_secrets\" in sys.modules\n",
     "\n",
     "if IS_COLAB or IS_KAGGLE:\n",
-    "    !pip install -q -U tfx==0.21.2\n",
+    "    %pip install -q -U tfx==0.21.2\n",
     "    print(\"You can safely ignore the package incompatibility errors.\")\n",
     "\n",
     "# Scikit-Learn ≥0.20 is required\n",
diff --git a/16_nlp_with_rnns_and_attention.ipynb b/16_nlp_with_rnns_and_attention.ipynb
index 3c03bc3..e20a868 100644
--- a/16_nlp_with_rnns_and_attention.ipynb
+++ b/16_nlp_with_rnns_and_attention.ipynb
@@ -57,8 +57,8 @@
     "IS_KAGGLE = \"kaggle_secrets\" in sys.modules\n",
     "\n",
     "if IS_COLAB:\n",
-    "    !pip install -q -U tensorflow-addons\n",
-    "    !pip install -q -U transformers\n",
+    "    %pip install -q -U tensorflow-addons\n",
+    "    %pip install -q -U transformers\n",
     "\n",
     "# Scikit-Learn ≥0.20 is required\n",
     "import sklearn\n",
diff --git a/18_reinforcement_learning.ipynb b/18_reinforcement_learning.ipynb
index 5e188d1..787dd6b 100644
--- a/18_reinforcement_learning.ipynb
+++ b/18_reinforcement_learning.ipynb
@@ -52,8 +52,8 @@
     "\n",
     "if IS_COLAB or IS_KAGGLE:\n",
     "    !apt update && apt install -y libpq-dev libsdl2-dev swig xorg-dev xvfb\n",
-    "    !pip install -q -U tf-agents pyvirtualdisplay gym[box2d]\n",
-    "    !pip install -q -U atari_py==0.2.5\n",
+    "    %pip install -q -U tf-agents pyvirtualdisplay gym[box2d]\n",
+    "    %pip install -q -U atari_py==0.2.5\n",
     "\n",
     "# Scikit-Learn ≥0.20 is required\n",
     "import sklearn\n",
@@ -220,7 +220,7 @@
     "Alternatively, you can install the [pyvirtualdisplay](https://github.com/ponty/pyvirtualdisplay) Python library which wraps Xvfb:\n",
     "\n",
     "```bash\n",
-    "python3 -m pip install -U pyvirtualdisplay\n",
+    "%pip install -U pyvirtualdisplay\n",
     "```\n",
     "\n",
     "And run the following code:"
@@ -2812,7 +2812,7 @@
    "metadata": {},
    "source": [
     "## 8.\n",
-    "_Exercise: Use policy gradients to solve OpenAI Gym's LunarLander-v2 environment. You will need to install the Box2D dependencies (`python3 -m pip install -U gym[box2d]`)._"
+    "_Exercise: Use policy gradients to solve OpenAI Gym's LunarLander-v2 environment. You will need to install the Box2D dependencies (`%pip install -U gym[box2d]`)._"
    ]
   },
   {
diff --git a/19_training_and_deploying_at_scale.ipynb b/19_training_and_deploying_at_scale.ipynb
index 526c725..a0cb2b3 100644
--- a/19_training_and_deploying_at_scale.ipynb
+++ b/19_training_and_deploying_at_scale.ipynb
@@ -54,7 +54,7 @@
     "    !echo \"deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal\" > /etc/apt/sources.list.d/tensorflow-serving.list\n",
     "    !curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -\n",
     "    !apt update && apt-get install -y tensorflow-model-server\n",
-    "    !pip install -q -U tensorflow-serving-api\n",
+    "    %pip install -q -U tensorflow-serving-api\n",
     "\n",
     "# Scikit-Learn ≥0.20 is required\n",
     "import sklearn\n",

From 495de15361516868dfe6e45c2e993ec957f976fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Thu, 7 Oct 2021 19:08:15 +1300
Subject: [PATCH 16/16] TF mostly fixed an issue so remove workaround for
 ReconstructingRegressor

---
 ...tom_models_and_training_with_tensorflow.ipynb | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/12_custom_models_and_training_with_tensorflow.ipynb b/12_custom_models_and_training_with_tensorflow.ipynb
index 723c20f..e225660 100644
--- a/12_custom_models_and_training_with_tensorflow.ipynb
+++ b/12_custom_models_and_training_with_tensorflow.ipynb
@@ -2209,7 +2209,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Note**: due to an issue introduced in TF 2.2 ([#46858](https://github.com/tensorflow/tensorflow/issues/46858)), it is currently not possible to use `add_loss()` along with the `build()` method. So the following code differs from the book: I create the `reconstruct` layer in the constructor instead of the `build()` method. Unfortunately, this means that the number of units in this layer must be hard-coded (alternatively, it could be passed as an argument to the constructor)."
+    "**Note**: the following code has two differences with the code in the book:\n",
+    "1. It creates a `keras.metrics.Mean()` metric in the constructor and uses it in the `call()` method to track the mean reconstruction loss. Since we only want to do this during training, we add a `training` argument to the `call()` method, and if `training` is `True`, then we update `reconstruction_mean` and we call `self.add_metric()` to ensure it's displayed properly.\n",
+    "2. Due to an issue introduced in TF 2.2 ([#46858](https://github.com/tensorflow/tensorflow/issues/46858)), we must not call `super().build()` inside the `build()` method."
    ]
   },
   {
@@ -2218,21 +2220,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "class ReconstructingRegressor(keras.models.Model):\n",
+    "class ReconstructingRegressor(keras.Model):\n",
     "    def __init__(self, output_dim, **kwargs):\n",
     "        super().__init__(**kwargs)\n",
     "        self.hidden = [keras.layers.Dense(30, activation=\"selu\",\n",
     "                                          kernel_initializer=\"lecun_normal\")\n",
     "                       for _ in range(5)]\n",
     "        self.out = keras.layers.Dense(output_dim)\n",
-    "        self.reconstruct = keras.layers.Dense(8) # workaround for TF issue #46858\n",
     "        self.reconstruction_mean = keras.metrics.Mean(name=\"reconstruction_error\")\n",
     "\n",
-    "    #Commented out due to TF issue #46858, see the note above\n",
-    "    #def build(self, batch_input_shape):\n",
-    "    #    n_inputs = batch_input_shape[-1]\n",
-    "    #    self.reconstruct = keras.layers.Dense(n_inputs)\n",
-    "    #    super().build(batch_input_shape)\n",
+    "    def build(self, batch_input_shape):\n",
+    "        n_inputs = batch_input_shape[-1]\n",
+    "        self.reconstruct = keras.layers.Dense(n_inputs)\n",
+    "        #super().build(batch_input_shape)\n",
     "\n",
     "    def call(self, inputs, training=None):\n",
     "        Z = inputs\n",