Sync notebook with book's code examples, and better identify extra code

main
Aurélien Geron 2022-02-19 18:17:36 +13:00
parent 1c2421fc88
commit b63019fd28
9 changed files with 318 additions and 301 deletions

View File

@ -177,7 +177,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book code to save the figures as high-res PNGs for the book\n",
"# extra code code to save the figures as high-res PNGs for the book\n",
"\n",
"IMAGES_PATH = Path() / \"images\" / \"end_to_end_project\"\n",
"IMAGES_PATH.mkdir(parents=True, exist_ok=True)\n",
@ -197,7 +197,7 @@
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# not in the book the next 5 lines define the default font sizes\n",
"# extra code the next 5 lines define the default font sizes\n",
"plt.rc('font', size=14)\n",
"plt.rc('axes', labelsize=14, titlesize=14)\n",
"plt.rc('legend', fontsize=14)\n",
@ -205,7 +205,7 @@
"plt.rc('ytick', labelsize=10)\n",
"\n",
"housing.hist(bins=50, figsize=(12, 8))\n",
"save_fig(\"attribute_histogram_plots\") # not in the book\n",
"save_fig(\"attribute_histogram_plots\") # extra code\n",
"plt.show()"
]
},
@ -351,7 +351,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book shows how to compute the 10.7% proba of getting a bad sample\n",
"# extra code shows how to compute the 10.7% proba of getting a bad sample\n",
"\n",
"from scipy.stats import binom\n",
"\n",
@ -375,7 +375,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book shows another way to estimate the probability of bad sample\n",
"# extra code shows another way to estimate the probability of bad sample\n",
"\n",
"np.random.seed(42)\n",
"\n",
@ -403,7 +403,7 @@
"housing[\"income_cat\"].value_counts().sort_index().plot.bar(rot=0, grid=True)\n",
"plt.xlabel(\"Income category\")\n",
"plt.ylabel(\"Number of districts\")\n",
"save_fig(\"housing_income_cat_bar_plot\") # not in the book\n",
"save_fig(\"housing_income_cat_bar_plot\") # extra code\n",
"plt.show()"
]
},
@ -464,7 +464,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code computes the data for Figure 210\n",
"# extra code computes the data for Figure 210\n",
"\n",
"def income_cat_proportions(data):\n",
" return data[\"income_cat\"].value_counts() / len(data)\n",
@ -524,7 +524,7 @@
"outputs": [],
"source": [
"housing.plot(kind=\"scatter\", x=\"longitude\", y=\"latitude\", grid=True)\n",
"save_fig(\"bad_visualization_plot\") # not in the book\n",
"save_fig(\"bad_visualization_plot\") # extra code\n",
"plt.show()"
]
},
@ -535,7 +535,7 @@
"outputs": [],
"source": [
"housing.plot(kind=\"scatter\", x=\"longitude\", y=\"latitude\", grid=True, alpha=0.2)\n",
"save_fig(\"better_visualization_plot\") # not in the book\n",
"save_fig(\"better_visualization_plot\") # extra code\n",
"plt.show()"
]
},
@ -549,7 +549,7 @@
" s=housing[\"population\"] / 100, label=\"population\",\n",
" c=\"median_house_value\", cmap=\"jet\", colorbar=True,\n",
" legend=True, sharex=False, figsize=(10, 7))\n",
"save_fig(\"housing_prices_scatterplot\") # not in the book\n",
"save_fig(\"housing_prices_scatterplot\") # extra code\n",
"plt.show()"
]
},
@ -573,7 +573,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates the first figure in the chapter\n",
"# extra code this cell generates the first figure in the chapter\n",
"\n",
"# Download the California image\n",
"filename = \"california.png\"\n",
@ -638,7 +638,7 @@
"attributes = [\"median_house_value\", \"median_income\", \"total_rooms\",\n",
" \"housing_median_age\"]\n",
"scatter_matrix(housing[attributes], figsize=(12, 8))\n",
"save_fig(\"scatter_matrix_plot\") # not in the book\n",
"save_fig(\"scatter_matrix_plot\") # extra code\n",
"plt.show()"
]
},
@ -650,7 +650,7 @@
"source": [
"housing.plot(kind=\"scatter\", x=\"median_income\", y=\"median_house_value\",\n",
" alpha=0.1, grid=True)\n",
"save_fig(\"income_vs_house_value_scatterplot\") # not in the book\n",
"save_fig(\"income_vs_house_value_scatterplot\") # extra code\n",
"plt.show()"
]
},
@ -1195,7 +1195,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 217\n",
"# extra code this cell generates Figure 217\n",
"fig, axs = plt.subplots(1, 2, figsize=(8, 3), sharey=True)\n",
"housing[\"population\"].hist(ax=axs[0], bins=50)\n",
"housing[\"population\"].apply(np.log).hist(ax=axs[1], bins=50)\n",
@ -1219,7 +1219,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code just shows that we get a uniform distribution\n",
"# extra code just shows that we get a uniform distribution\n",
"percentiles = [np.percentile(housing[\"median_income\"], p)\n",
" for p in range(1, 100)]\n",
"flattened_median_income = pd.cut(housing[\"median_income\"],\n",
@ -1251,7 +1251,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 218\n",
"# extra code this cell generates Figure 218\n",
"\n",
"ages = np.linspace(housing[\"housing_median_age\"].min(),\n",
" housing[\"housing_median_age\"].max(),\n",
@ -1488,7 +1488,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 219\n",
"# extra code this cell generates Figure 219\n",
"\n",
"housing_renamed = housing.rename(columns={\n",
" \"latitude\": \"Latitude\", \"longitude\": \"Longitude\",\n",
@ -1638,7 +1638,7 @@
"metadata": {},
"outputs": [],
"source": [
"df_housing_num_prepared.head(2) # not in the book"
"df_housing_num_prepared.head(2) # extra code"
]
},
{
@ -1737,7 +1737,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows that we can get a DataFrame out if we want\n",
"# extra code shows that we can get a DataFrame out if we want\n",
"housing_prepared_fr = pd.DataFrame(\n",
" housing_prepared,\n",
" columns=preprocessing.get_feature_names_out(),\n",
@ -1866,7 +1866,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code computes the error ratios discussed in the book\n",
"# extra code computes the error ratios discussed in the book\n",
"error_ratios = housing_predictions[:5].round(-2) / housing_labels.iloc[:5].values - 1\n",
"print(\", \".join([f\"{100 * ratio:.1f}%\" for ratio in error_ratios]))"
]
@ -1942,7 +1942,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code computes the error stats for the linear model\n",
"# extra code computes the error stats for the linear model\n",
"lin_rmses = -cross_val_score(lin_reg, housing, housing_labels,\n",
" scoring=\"neg_root_mean_squared_error\", cv=10)\n",
"pd.Series(lin_rmses).describe()"
@ -2062,7 +2062,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows part of the output of get_params().keys()\n",
"# extra code shows part of the output of get_params().keys()\n",
"print(str(full_pipeline.get_params().keys())[:1000] + \"...\")"
]
},
@ -2107,7 +2107,7 @@
"cv_res = pd.DataFrame(grid_search.cv_results_)\n",
"cv_res.sort_values(by=\"mean_test_score\", ascending=False, inplace=True)\n",
"\n",
"# not in the book these few lines of code just make the DataFrame look nicer\n",
"# extra code these few lines of code just make the DataFrame look nicer\n",
"cv_res = cv_res[[\"param_preprocessing__geo__n_clusters\",\n",
" \"param_random_forest__max_features\", \"split0_test_score\",\n",
" \"split1_test_score\", \"split2_test_score\", \"mean_test_score\"]]\n",
@ -2174,7 +2174,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code displays the random search results\n",
"# extra code displays the random search results\n",
"cv_res = pd.DataFrame(rnd_search.cv_results_)\n",
"cv_res.sort_values(by=\"mean_test_score\", ascending=False, inplace=True)\n",
"cv_res = cv_res[[\"param_preprocessing__geo__n_clusters\",\n",
@ -2213,7 +2213,7 @@
},
"outputs": [],
"source": [
"# not in the book plots a few distributions you can use in randomized search\n",
"# extra code plots a few distributions you can use in randomized search\n",
"\n",
"from scipy.stats import randint, uniform, geom, expon\n",
"\n",
@ -2275,7 +2275,7 @@
},
"outputs": [],
"source": [
"# not in the book shows the difference between expon and reciprocal\n",
"# extra code shows the difference between expon and reciprocal\n",
"\n",
"from scipy.stats import reciprocal\n",
"\n",
@ -2410,7 +2410,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book shows how to compute a confidence interval for the RMSE\n",
"# extra code shows how to compute a confidence interval for the RMSE\n",
"m = len(squared_errors)\n",
"mean = squared_errors.mean()\n",
"tscore = stats.t.ppf((1 + confidence) / 2, df=m - 1)\n",
@ -2431,7 +2431,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book computes a confidence interval again using z-score\n",
"# extra code computes a confidence interval again using z-score\n",
"zscore = stats.norm.ppf((1 + confidence) / 2)\n",
"zmargin = zscore * squared_errors.std(ddof=1) / np.sqrt(m)\n",
"np.sqrt(mean - zmargin), np.sqrt(mean + zmargin)"
@ -2477,7 +2477,7 @@
"source": [
"import joblib\n",
"\n",
"# not in the book excluded for conciseness\n",
"# extra code excluded for conciseness\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.base import BaseEstimator, TransformerMixin\n",
"from sklearn.metrics.pairwise import rbf_kernel\n",

View File

@ -142,7 +142,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book it's a bit too long\n",
"# extra code it's a bit too long\n",
"print(mnist.DESCR)"
]
},
@ -152,7 +152,7 @@
"metadata": {},
"outputs": [],
"source": [
"mnist.keys() # not in the book we only use data and target in this notebook"
"mnist.keys() # extra code we only use data and target in this notebook"
]
},
{
@ -216,7 +216,7 @@
"\n",
"some_digit = X[0]\n",
"plot_digit(some_digit)\n",
"save_fig(\"some_digit_plot\") # not in the book\n",
"save_fig(\"some_digit_plot\") # extra code\n",
"plt.show()"
]
},
@ -235,7 +235,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 32\n",
"# extra code this cell generates and saves Figure 32\n",
"plt.figure(figsize=(9, 9))\n",
"for idx, image_data in enumerate(X[:100]):\n",
" plt.subplot(10, 10, idx + 1)\n",
@ -427,7 +427,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code also computes the precision: TP / (FP + TP)\n",
"# extra code this cell also computes the precision: TP / (FP + TP)\n",
"cm[1, 1] / (cm[0, 1] + cm[1, 1])"
]
},
@ -446,7 +446,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code also computes the recall: TP / (FN + TP)\n",
"# extra code this cell also computes the recall: TP / (FN + TP)\n",
"cm[1, 1] / (cm[1, 0] + cm[1, 1])"
]
},
@ -467,7 +467,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code also computes the f1 score\n",
"# extra code this cell also computes the f1 score\n",
"cm[1, 1] / (cm[1, 1] + (cm[1, 0] + cm[0, 1]) / 2)"
]
},
@ -513,8 +513,8 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code just shows that y_scores > 0 produces the same\n",
"# result as calling predict()\n",
"# extra code just shows that y_scores > 0 produces the same result as\n",
"# calling predict()\n",
"y_scores > 0"
]
},
@ -556,12 +556,12 @@
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 4)) # not in the book it's not needed, just formatting\n",
"plt.figure(figsize=(8, 4)) # extra code it's not needed, just formatting\n",
"plt.plot(thresholds, precisions[:-1], \"b--\", label=\"Precision\", linewidth=2)\n",
"plt.plot(thresholds, recalls[:-1], \"g-\", label=\"Recall\", linewidth=2)\n",
"plt.vlines(threshold, 0, 1.0, \"k\", \"dotted\", label=\"threshold\")\n",
"\n",
"# not in the book this section just beautifies and saves Figure 35\n",
"# extra code this section just beautifies and saves Figure 35\n",
"idx = (thresholds >= threshold).argmax() # first index ≥ threshold\n",
"plt.plot(thresholds[idx], precisions[idx], \"bo\")\n",
"plt.plot(thresholds[idx], recalls[idx], \"go\")\n",
@ -580,13 +580,13 @@
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.patches as patches # not in the book for the curved arrow\n",
"import matplotlib.patches as patches # extra code for the curved arrow\n",
"\n",
"plt.figure(figsize=(6, 5)) # not in the book not needed, just formatting\n",
"plt.figure(figsize=(6, 5)) # extra code not needed, just formatting\n",
"\n",
"plt.plot(recalls, precisions, linewidth=2, label=\"Precision/Recall curve\")\n",
"\n",
"# not in the book just beautifies and saves Figure 36\n",
"# extra code just beautifies and saves Figure 36\n",
"plt.plot([recalls[idx], recalls[idx]], [0., precisions[idx]], \"k:\")\n",
"plt.plot([0.0, recalls[idx]], [precisions[idx], precisions[idx]], \"k:\")\n",
"plt.plot([recalls[idx]], [precisions[idx]], \"ko\",\n",
@ -673,12 +673,12 @@
"idx_for_threshold_at_90 = (thresholds <= threshold_for_90_precision).argmax()\n",
"tpr_90, fpr_90 = tpr[idx_for_threshold_at_90], fpr[idx_for_threshold_at_90]\n",
"\n",
"plt.figure(figsize=(6, 5)) # not in the book not needed, just formatting\n",
"plt.figure(figsize=(6, 5)) # extra code not needed, just formatting\n",
"plt.plot(fpr, tpr, linewidth=2, label=\"ROC curve\")\n",
"plt.plot([0, 1], [0, 1], 'k:', label=\"Random classifier's ROC curve\")\n",
"plt.plot([fpr_90], [tpr_90], \"ko\", label=\"Threshold for 90% precision\")\n",
"\n",
"# not in the book just beautifies and saves Figure 37\n",
"# extra code just beautifies and saves Figure 37\n",
"plt.gca().add_patch(patches.FancyArrowPatch(\n",
" (0.20, 0.89), (0.07, 0.70),\n",
" connectionstyle=\"arc3,rad=.4\",\n",
@ -778,13 +778,13 @@
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(6, 5)) # not in the book not needed, just formatting\n",
"plt.figure(figsize=(6, 5)) # extra code not needed, just formatting\n",
"\n",
"plt.plot(recalls_forest, precisions_forest, \"b-\", linewidth=2,\n",
" label=\"Random Forest\")\n",
"plt.plot(recalls, precisions, \"--\", linewidth=2, label=\"SGD\")\n",
"\n",
"# not in the book just beautifies and saves Figure 38\n",
"# extra code just beautifies and saves Figure 38\n",
"plt.xlabel(\"Recall\")\n",
"plt.ylabel(\"Precision\")\n",
"plt.axis([0, 1, 0, 1])\n",
@ -925,7 +925,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows how to get all 45 OvO scores if needed\n",
"# extra code shows how to get all 45 OvO scores if needed\n",
"svm_clf.decision_function_shape = \"ovo\"\n",
"some_digit_scores_ovo = svm_clf.decision_function([some_digit])\n",
"some_digit_scores_ovo.round(2)"
@ -1033,7 +1033,7 @@
"from sklearn.metrics import ConfusionMatrixDisplay\n",
"\n",
"y_train_pred = cross_val_predict(sgd_clf, X_train_scaled, y_train, cv=3)\n",
"plt.rc('font', size=9) # not in the book make the text smaller\n",
"plt.rc('font', size=9) # extra code make the text smaller\n",
"ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred)\n",
"plt.show()"
]
@ -1044,7 +1044,7 @@
"metadata": {},
"outputs": [],
"source": [
"plt.rc('font', size=10) # not in the book\n",
"plt.rc('font', size=10) # extra code\n",
"ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred,\n",
" normalize=\"true\", values_format=\".0%\")\n",
"plt.show()"
@ -1057,7 +1057,7 @@
"outputs": [],
"source": [
"sample_weight = (y_train_pred != y_train)\n",
"plt.rc('font', size=10) # not in the book\n",
"plt.rc('font', size=10) # extra code\n",
"ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred,\n",
" sample_weight=sample_weight,\n",
" normalize=\"true\", values_format=\".0%\")\n",
@ -1077,7 +1077,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 39\n",
"# extra code this cell generates and saves Figure 39\n",
"fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))\n",
"plt.rc('font', size=9)\n",
"ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, ax=axs[0])\n",
@ -1096,7 +1096,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 310\n",
"# extra code this cell generates and saves Figure 310\n",
"fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(9, 4))\n",
"plt.rc('font', size=10)\n",
"ConfusionMatrixDisplay.from_predictions(y_train, y_train_pred, ax=axs[0],\n",
@ -1131,7 +1131,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 311\n",
"# extra code this cell generates and saves Figure 311\n",
"size = 5\n",
"pad = 0.2\n",
"plt.figure(figsize=(size, size))\n",
@ -1224,9 +1224,9 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows that we get a negligible performance\n",
"# improvement when we set average=\"weighted\" because the\n",
"# classes are already pretty well balanced.\n",
"# extra code shows that we get a negligible performance improvement when we\n",
"# set average=\"weighted\" because the classes are already pretty\n",
"# well balanced.\n",
"f1_score(y_multilabel, y_train_knn_pred, average=\"weighted\")"
]
},
@ -1279,7 +1279,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates Figure 312\n",
"# extra code this cell generates and saves Figure 312\n",
"plt.subplot(121); plot_digit(X_test_mod[0])\n",
"plt.subplot(122); plot_digit(y_test_mod[0])\n",
"save_fig(\"noisy_digit_example_plot\")\n",
@ -1296,7 +1296,7 @@
"knn_clf.fit(X_train_mod, y_train_mod)\n",
"clean_digit = knn_clf.predict([X_test_mod[0]])\n",
"plot_digit(clean_digit)\n",
"save_fig(\"cleaned_digit_example_plot\") # not in the book saves Figure 313\n",
"save_fig(\"cleaned_digit_example_plot\") # extra code saves Figure 313\n",
"plt.show()"
]
},

View File

@ -91,8 +91,8 @@
"plt.rc('font', size=14)\n",
"plt.rc('axes', labelsize=14, titlesize=14)\n",
"plt.rc('legend', fontsize=14)\n",
"plt.rc('xtick',labelsize=10)\n",
"plt.rc('ytick',labelsize=10)"
"plt.rc('xtick', labelsize=10)\n",
"plt.rc('ytick', labelsize=10)"
]
},
{
@ -154,7 +154,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book generates and saves Figure 41\n",
"# extra code generates and saves Figure 41\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
@ -209,11 +209,11 @@
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.figure(figsize=(6, 4)) # not in the book not needed, just formatting\n",
"plt.figure(figsize=(6, 4)) # extra code not needed, just formatting\n",
"plt.plot(X_new, y_predict, \"r-\", label=\"Predictions\")\n",
"plt.plot(X, y, \"b.\")\n",
"\n",
"# not in the book beautifies and saves Figure 42\n",
"# extra code beautifies and saves Figure 42\n",
"plt.xlabel(\"$x_1$\")\n",
"plt.ylabel(\"$y$\", rotation=0)\n",
"plt.axis([0, 2, 0, 15])\n",
@ -327,7 +327,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book generates and saves Figure 48\n",
"# extra code generates and saves Figure 48\n",
"\n",
"import matplotlib as mpl\n",
"\n",
@ -352,9 +352,9 @@
" return theta_path\n",
"\n",
"np.random.seed(42)\n",
"theta = np.random.randn(2,1) # random initialization\n",
"theta = np.random.randn(2, 1) # random initialization\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"plt.figure(figsize=(10, 4))\n",
"plt.subplot(131)\n",
"plot_gradient_descent(theta, eta=0.02)\n",
"plt.ylabel(\"$y$\", rotation=0)\n",
@ -381,8 +381,8 @@
"metadata": {},
"outputs": [],
"source": [
"theta_path_sgd = [] # not in the book we need to store the path of theta in\n",
" # the parameter space to plot the next figure"
"theta_path_sgd = [] # extra code we need to store the path of theta in the\n",
" # parameter space to plot the next figure"
]
},
{
@ -400,13 +400,13 @@
"np.random.seed(42)\n",
"theta = np.random.randn(2, 1) # random initialization\n",
"\n",
"n_shown = 20 # not in the book just needed to generate the figure below\n",
"plt.figure(figsize=(6, 4)) # not in the book not needed, just formatting\n",
"n_shown = 20 # extra code just needed to generate the figure below\n",
"plt.figure(figsize=(6, 4)) # extra code not needed, just formatting\n",
"\n",
"for epoch in range(n_epochs):\n",
" for iteration in range(m):\n",
"\n",
" # not in the book these 4 lines are used to generate the figure\n",
" # extra code these 4 lines are used to generate the figure\n",
" if epoch == 0 and iteration < n_shown:\n",
" y_predict = X_new_b @ theta\n",
" color = mpl.colors.rgb2hex(plt.cm.OrRd(iteration / n_shown + 0.15))\n",
@ -415,12 +415,12 @@
" random_index = np.random.randint(m)\n",
" xi = X_b[random_index : random_index + 1]\n",
" yi = y[random_index : random_index + 1]\n",
" gradients = 2 / 1 * xi.T @ (xi @ theta - yi)\n",
" gradients = 2 * xi.T @ (xi @ theta - yi) # for SGD, do not divide by m\n",
" eta = learning_schedule(epoch * m + iteration)\n",
" theta = theta - eta * gradients\n",
" theta_path_sgd.append(theta) # not in the book to generate the figure\n",
" theta_path_sgd.append(theta) # extra code to generate the figure\n",
"\n",
"# not in the book this section beautifies and saves Figure 410\n",
"# extra code this section beautifies and saves Figure 410\n",
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\")\n",
"plt.ylabel(\"$y$\", rotation=0)\n",
@ -449,9 +449,9 @@
"source": [
"from sklearn.linear_model import SGDRegressor\n",
"\n",
"sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1,\n",
" random_state=42)\n",
"sgd_reg.fit(X, y.ravel()) # y.ravel() because fit() expects 1D targets"
"sgd_reg = SGDRegressor(max_iter=1000, tol=1e-5, penalty=None, eta0=0.01,\n",
" n_iter_no_change=100, random_state=42)\n",
"sgd_reg.fit(X, y.ravel()) # y.ravel() because fit() expects 1D targets\n"
]
},
{
@ -483,7 +483,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 411\n",
"# extra code this cell generates and saves Figure 411\n",
"\n",
"from math import ceil\n",
"\n",
@ -558,7 +558,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 412\n",
"# extra code this cell generates and saves Figure 412\n",
"plt.figure(figsize=(6, 4))\n",
"plt.plot(X, y, \"b.\")\n",
"plt.xlabel(\"$x_1$\")\n",
@ -608,7 +608,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 413\n",
"# extra code this cell generates and saves Figure 413\n",
"\n",
"X_new = np.linspace(-3, 3, 100).reshape(100, 1)\n",
"X_new_poly = poly_features.transform(X_new)\n",
@ -632,7 +632,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 414\n",
"# extra code this cell generates and saves Figure 414\n",
"\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.pipeline import make_pipeline\n",
@ -680,11 +680,11 @@
"train_errors = -train_scores.mean(axis=1)\n",
"valid_errors = -valid_scores.mean(axis=1)\n",
"\n",
"plt.figure(figsize=(6, 4)) # not in the book not need, just formatting\n",
"plt.figure(figsize=(6, 4)) # extra code not need, just formatting\n",
"plt.plot(train_sizes, train_errors, \"r-+\", linewidth=2, label=\"train\")\n",
"plt.plot(train_sizes, valid_errors, \"b-\", linewidth=3, label=\"valid\")\n",
"\n",
"# not in the book beautifies and saves Figure 415\n",
"# extra code beautifies and saves Figure 415\n",
"plt.xlabel(\"Training set size\")\n",
"plt.ylabel(\"RMSE\")\n",
"plt.grid()\n",
@ -718,7 +718,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book generates and saves Figure 416\n",
"# extra code generates and saves Figure 416\n",
"\n",
"train_errors = -train_scores.mean(axis=1)\n",
"valid_errors = -valid_scores.mean(axis=1)\n",
@ -762,7 +762,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book we've done this type of generation several times before\n",
"# extra code we've done this type of generation several times before\n",
"np.random.seed(42)\n",
"m = 20\n",
"X = 3 * np.random.rand(m, 1)\n",
@ -776,7 +776,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book a quick peek at the dataset we just generated\n",
"# extra code a quick peek at the dataset we just generated\n",
"plt.figure(figsize=(6, 4))\n",
"plt.plot(X, y, \".\")\n",
"plt.xlabel(\"$x_1$\")\n",
@ -794,7 +794,7 @@
"source": [
"from sklearn.linear_model import Ridge\n",
"\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\")\n",
"ridge_reg = Ridge(alpha=0.1, solver=\"cholesky\")\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
@ -805,7 +805,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 417\n",
"# extra code this cell generates and saves Figure 417\n",
"\n",
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
" plt.plot(X, y, \"b.\", linewidth=3)\n",
@ -845,8 +845,9 @@
"metadata": {},
"outputs": [],
"source": [
"sgd_reg = SGDRegressor(penalty=\"l2\", random_state=42)\n",
"sgd_reg.fit(X, y.ravel())\n",
"sgd_reg = SGDRegressor(penalty=\"l2\", alpha=0.1 / m, tol=None,\n",
" max_iter=1000, eta0=0.01, random_state=42)\n",
"sgd_reg.fit(X, y.ravel()) # y.ravel() because fit() expects 1D targets\n",
"sgd_reg.predict([[1.5]])"
]
},
@ -856,13 +857,36 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book show that we get roughly the same solution as earlier when\n",
"# we use Stochastic Average GD (solver=\"sag\")\n",
"ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
"# extra code show that we get roughly the same solution as earlier when\n",
"# we use Stochastic Average GD (solver=\"sag\")\n",
"ridge_reg = Ridge(alpha=0.1, solver=\"sag\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# extra code shows the closed form solution of Ridge regression,\n",
"# compare with the next Ridge model's learned parameters below\n",
"alpha = 0.1\n",
"A = np.array([[0., 0.], [0., 1.]])\n",
"X_b = np.c_[np.ones(m), X]\n",
"np.linalg.inv(X_b.T @ X_b + alpha * A) @ X_b.T @ y"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"ridge_reg.intercept_, ridge_reg.coef_ # extra code"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -872,7 +896,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
@ -885,11 +909,11 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 418\n",
"# extra code this cell generates and saves Figure 418\n",
"plt.figure(figsize=(9, 3.5))\n",
"plt.subplot(121)\n",
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)\n",
@ -903,11 +927,11 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"# not in the book this BIG cell generates and saves Figure 419\n",
"# extra code this BIG cell generates and saves Figure 419\n",
"\n",
"t1a, t1b, t2a, t2b = -1, 3, -1.5, 1.5\n",
"\n",
@ -996,7 +1020,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
@ -1023,22 +1047,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"# not in the book this is the same code as earlier\n",
"np.random.seed(42)\n",
"m = 100\n",
"X = 6 * np.random.rand(m, 1) - 3\n",
"y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)\n",
"X_train, y_train = X[: m // 2], y[: m // 2, 0]\n",
"X_valid, y_valid = X[m // 2 :], y[m // 2 :, 0]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
@ -1046,6 +1055,14 @@
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"# extra code creates the same quadratic dataset as earlier and splits it\n",
"np.random.seed(42)\n",
"m = 100\n",
"X = 6 * np.random.rand(m, 1) - 3\n",
"y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)\n",
"X_train, y_train = X[: m // 2], y[: m // 2, 0]\n",
"X_valid, y_valid = X[m // 2 :], y[m // 2 :, 0]\n",
"\n",
"preprocessing = make_pipeline(PolynomialFeatures(degree=90, include_bias=False),\n",
" StandardScaler())\n",
"X_train_prep = preprocessing.fit_transform(X_train)\n",
@ -1053,7 +1070,7 @@
"sgd_reg = SGDRegressor(penalty=None, eta0=0.002, random_state=42)\n",
"n_epochs = 500\n",
"best_valid_rmse = float('inf')\n",
"train_errors, val_errors = [], [] # not in the book it's for the figure below\n",
"train_errors, val_errors = [], [] # extra code it's for the figure below\n",
"\n",
"for epoch in range(n_epochs):\n",
" sgd_reg.partial_fit(X_train_prep, y_train)\n",
@ -1063,13 +1080,13 @@
" best_valid_rmse = val_error\n",
" best_model = deepcopy(sgd_reg)\n",
"\n",
" # not in the book we evaluate the train error and save it for the figure\n",
" # extra code we evaluate the train error and save it for the figure\n",
" y_train_predict = sgd_reg.predict(X_train_prep)\n",
" train_error = mean_squared_error(y_train, y_train_predict, squared=False)\n",
" val_errors.append(val_error)\n",
" train_errors.append(train_error)\n",
"\n",
"# not in the book this section generates and saves Figure 420\n",
"# extra code this section generates and saves Figure 420\n",
"best_epoch = np.argmin(val_errors)\n",
"plt.figure(figsize=(6, 4))\n",
"plt.annotate('Best model',\n",
@ -1106,11 +1123,11 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"# not in the book generates and saves Figure 421\n",
"# extra code generates and saves Figure 421\n",
"\n",
"lim = 6\n",
"t = np.linspace(-lim, lim, 100)\n",
@ -1140,7 +1157,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
@ -1150,22 +1167,13 @@
"list(iris)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"print(iris.DESCR) # not in the book it's a bit too long"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"iris.data.head(3)"
"print(iris.DESCR) # extra code it's a bit too long"
]
},
{
@ -1174,7 +1182,7 @@
"metadata": {},
"outputs": [],
"source": [
"iris.target.head(3) # note that the instances are not shuffled"
"iris.data.head(3)"
]
},
{
@ -1183,7 +1191,7 @@
"metadata": {},
"outputs": [],
"source": [
"iris.target_names"
"iris.target.head(3) # note that the instances are not shuffled"
]
},
{
@ -1191,6 +1199,15 @@
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"iris.target_names"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split\n",
@ -1205,7 +1222,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
@ -1213,14 +1230,14 @@
"y_proba = log_reg.predict_proba(X_new)\n",
"decision_boundary = X_new[y_proba[:, 1] >= 0.5][0, 0]\n",
"\n",
"plt.figure(figsize=(8, 3)) # not in the book not needed, just formatting\n",
"plt.figure(figsize=(8, 3)) # extra code not needed, just formatting\n",
"plt.plot(X_new, y_proba[:, 0], \"b--\", linewidth=2,\n",
" label=\"Not Iris virginica proba\")\n",
"plt.plot(X_new, y_proba[:, 1], \"g-\", linewidth=2, label=\"Iris virginica proba\")\n",
"plt.plot([decision_boundary, decision_boundary], [0, 1], \"k:\", linewidth=2,\n",
" label=\"Decision boundary\")\n",
"\n",
"# not in the book this section beautifies and saves Figure 421\n",
"# extra code this section beautifies and saves Figure 421\n",
"plt.arrow(x=decision_boundary, y=0.08, dx=-0.3, dy=0,\n",
" head_width=0.05, head_length=0.1, fc=\"b\", ec=\"b\")\n",
"plt.arrow(x=decision_boundary, y=0.92, dx=0.3, dy=0,\n",
@ -1239,7 +1256,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
@ -1248,7 +1265,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
@ -1257,11 +1274,11 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 422\n",
"# extra code this cell generates and saves Figure 422\n",
"\n",
"X = iris.data[[\"petal length (cm)\", \"petal width (cm)\"]].values\n",
"y = iris.target_names[iris.target] == 'virginica'\n",
@ -1307,7 +1324,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
@ -1321,7 +1338,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 59,
"metadata": {
"tags": []
},
@ -1332,7 +1349,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 60,
"metadata": {
"tags": []
},
@ -1343,11 +1360,11 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 423\n",
"# extra code this cell generates and saves Figure 423\n",
"\n",
"from matplotlib.colors import ListedColormap\n",
"\n",
@ -1419,7 +1436,7 @@
"metadata": {},
"source": [
"## 12. Batch Gradient Descent with early stopping for Softmax Regression\n",
"Exercise: _Implement Batch Gradient Descent with early stopping for Softmax Regression without using Scikit-Learn, only NumPy._"
"Exercise: _Implement Batch Gradient Descent with early stopping for Softmax Regression without using Scikit-Learn, only NumPy. Use it on a classification task such as the iris dataset._"
]
},
{
@ -1431,7 +1448,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
@ -1448,7 +1465,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
@ -1464,7 +1481,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
@ -1491,12 +1508,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The targets are currently class indices (0, 1 or 2), but we need target class probabilities to train the Softmax Regression model. Each instance will have target class probabilities equal to 0.0 for all classes except for the target class which will have a probability of 1.0 (in other words, the vector of class probabilities for any given instance is a one-hot vector). Let's write a small function to convert the vector of class indices into a matrix containing a one-hot vector for each instance. To understand this code, you need to know that `np.diag(np.ones(n))` creates an n×n matrix full of 0s except for 1s on the main diagonal. Moreover, if `a` in a NumPy array, then `a[[1,3,2]]` returns an array with 3 rows equal to `a[1]`, `a[3]` and `a[2]` (this is [advanced NumPy indexing](https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing))."
"The targets are currently class indices (0, 1 or 2), but we need target class probabilities to train the Softmax Regression model. Each instance will have target class probabilities equal to 0.0 for all classes except for the target class which will have a probability of 1.0 (in other words, the vector of class probabilities for any given instance is a one-hot vector). Let's write a small function to convert the vector of class indices into a matrix containing a one-hot vector for each instance. To understand this code, you need to know that `np.diag(np.ones(n))` creates an n×n matrix full of 0s except for 1s on the main diagonal. Moreover, if `a` in a NumPy array, then `a[[1, 3, 2]]` returns an array with 3 rows equal to `a[1]`, `a[3]` and `a[2]` (this is [advanced NumPy indexing](https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing))."
]
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
@ -1513,7 +1530,7 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
@ -1522,7 +1539,7 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
@ -1538,7 +1555,7 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
@ -1556,7 +1573,7 @@
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
@ -1578,7 +1595,7 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
@ -1597,7 +1614,7 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@ -1625,7 +1642,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
@ -1658,7 +1675,7 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
@ -1674,7 +1691,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
@ -1695,7 +1712,7 @@
},
{
"cell_type": "code",
"execution_count": 74,
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
@ -1732,7 +1749,7 @@
},
{
"cell_type": "code",
"execution_count": 75,
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
@ -1760,7 +1777,7 @@
},
{
"cell_type": "code",
"execution_count": 76,
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
@ -1797,7 +1814,7 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
@ -1825,11 +1842,11 @@
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"custom_cmap = mpl.colors.ListedColormap(['#fafab0','#9898ff','#a0faa0'])\n",
"custom_cmap = mpl.colors.ListedColormap(['#fafab0', '#9898ff', '#a0faa0'])\n",
"\n",
"x0, x1 = np.meshgrid(np.linspace(0, 8, 500).reshape(-1, 1),\n",
" np.linspace(0, 3.5, 200).reshape(-1, 1))\n",
@ -1869,7 +1886,7 @@
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [

View File

@ -140,7 +140,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 51\n",
"# extra code this cell generates and saves Figure 51\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
@ -219,7 +219,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 52\n",
"# extra code this cell generates and saves Figure 52\n",
"\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
@ -269,7 +269,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 53\n",
"# extra code this cell generates and saves Figure 53\n",
"\n",
"X_outliers = np.array([[3.4, 1.3], [3.2, 0.8]])\n",
"y_outliers = np.array([0, 0])\n",
@ -364,7 +364,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 54\n",
"# extra code this cell generates and saves Figure 54\n",
"\n",
"scaler = StandardScaler()\n",
"svm_clf1 = LinearSVC(C=1, max_iter=10_000, random_state=42)\n",
@ -432,7 +432,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 55\n",
"# extra code this cell generates and saves Figure 55\n",
"\n",
"X1D = np.linspace(-4, 4, 9).reshape(-1, 1)\n",
"X2D = np.c_[X1D, X1D**2]\n",
@ -492,7 +492,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 56\n",
"# extra code this cell generates and saves Figure 56\n",
"\n",
"def plot_dataset(X, y, axes):\n",
" plt.plot(X[:, 0][y==0], X[:, 1][y==0], \"bs\")\n",
@ -545,7 +545,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 57\n",
"# extra code this cell generates and saves Figure 57\n",
"\n",
"poly100_kernel_svm_clf = make_pipeline(\n",
" StandardScaler(),\n",
@ -585,7 +585,7 @@
},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 58\n",
"# extra code this cell generates and saves Figure 58\n",
"\n",
"def gaussian_rbf(x, landmark, gamma):\n",
" return np.exp(-gamma * np.linalg.norm(x - landmark, axis=1)**2)\n",
@ -675,7 +675,7 @@
},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 59\n",
"# extra code this cell generates and saves Figure 59\n",
"\n",
"from sklearn.svm import SVC\n",
"\n",
@ -724,7 +724,7 @@
"source": [
"from sklearn.svm import LinearSVR\n",
"\n",
"# not in the book these 3 lines generate a simple linear dataset\n",
"# extra code these 3 lines generate a simple linear dataset\n",
"np.random.seed(42)\n",
"X = 2 * np.random.rand(50, 1)\n",
"y = 4 + 3 * X[:, 0] + np.random.randn(50)\n",
@ -740,7 +740,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 510\n",
"# extra code this cell generates and saves Figure 510\n",
"\n",
"def find_support_vectors(svm_reg, X, y):\n",
" y_pred = svm_reg.predict(X)\n",
@ -800,7 +800,7 @@
"source": [
"from sklearn.svm import SVR\n",
"\n",
"# not in the book these 3 lines generate a simple quadratic dataset\n",
"# extra code these 3 lines generate a simple quadratic dataset\n",
"np.random.seed(42)\n",
"X = 2 * np.random.rand(50, 1) - 1\n",
"y = 0.2 + 0.1 * X[:, 0] + 0.5 * X[:, 0] ** 2 + np.random.randn(50) / 10\n",
@ -816,7 +816,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 511\n",
"# extra code this cell generates and saves Figure 511\n",
"\n",
"svm_poly_reg2 = make_pipeline(StandardScaler(),\n",
" SVR(kernel=\"poly\", degree=2, C=100))\n",
@ -857,7 +857,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 512\n",
"# extra code this cell generates and saves Figure 512\n",
"\n",
"import matplotlib.patches as patches\n",
"\n",
@ -906,7 +906,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 513\n",
"# extra code this cell generates and saves Figure 513\n",
"\n",
"s = np.linspace(-2.5, 2.5, 200)\n",
"hinge_pos = np.where(1 - s < 0, 0, 1 - s) # max(0, 1 - s)\n",
@ -1112,7 +1112,7 @@
"sgd_clf.fit(X, y)\n",
"\n",
"m = len(X)\n",
"t = np.array(y).reshape(-1, 1) * 2 - 1 # -1 if t==0, +1 if t==1\n",
"t = np.array(y).reshape(-1, 1) * 2 - 1 # -1 if y == 0, or +1 if y == 1\n",
"X_b = np.c_[np.ones((m, 1)), X] # Add bias input x0=1\n",
"X_b_t = X_b * t\n",
"sgd_theta = np.r_[sgd_clf.intercept_[0], sgd_clf.coef_[0]]\n",

View File

@ -193,7 +193,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"!dot -Tpng {IMAGES_PATH / \"iris_tree.dot\"} -o {IMAGES_PATH / \"iris_tree.png\"}"
]
},
@ -213,7 +213,7 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# not in the book just formatting details\n",
"# extra code just formatting details\n",
"from matplotlib.colors import ListedColormap\n",
"custom_cmap = ListedColormap(['#fafab0', '#9898ff', '#a0faa0'])\n",
"plt.figure(figsize=(8, 4))\n",
@ -226,7 +226,7 @@
" plt.plot(X_iris[:, 0][y_iris == idx], X_iris[:, 1][y_iris == idx],\n",
" style, label=f\"Iris {name}\")\n",
"\n",
"# not in the book this section beautifies and saves Figure 62\n",
"# extra code this section beautifies and saves Figure 62\n",
"tree_clf_deeper = DecisionTreeClassifier(max_depth=3, random_state=42)\n",
"tree_clf_deeper.fit(X_iris, y_iris)\n",
"th0, th1, th2a, th2b = tree_clf_deeper.tree_.threshold[[0, 2, 3, 6]]\n",
@ -341,7 +341,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 63\n",
"# extra code this cell generates and saves Figure 63\n",
"\n",
"def plot_decision_boundary(clf, X, y, axes, cmap):\n",
" x1, x2 = np.meshgrid(np.linspace(axes[0], axes[1], 100),\n",
@ -437,7 +437,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book we've already seen how to use export_graphviz()\n",
"# extra code we've already seen how to use export_graphviz()\n",
"export_graphviz(\n",
" tree_reg,\n",
" out_file=str(IMAGES_PATH / \"regression_tree.dot\"),\n",
@ -482,7 +482,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 65\n",
"# extra code this cell generates and saves Figure 65\n",
"\n",
"def plot_regression_predictions(tree_reg, X, y, axes=[-0.5, 0.5, -0.05, 0.25]):\n",
" x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)\n",
@ -526,7 +526,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 66\n",
"# extra code this cell generates and saves Figure 66\n",
"\n",
"tree_reg1 = DecisionTreeRegressor(random_state=42)\n",
"tree_reg2 = DecisionTreeRegressor(random_state=42, min_samples_leaf=10)\n",
@ -579,7 +579,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 67\n",
"# extra code this cell generates and saves Figure 67\n",
"\n",
"np.random.seed(6)\n",
"X_square = np.random.rand(100, 2) - 0.5\n",
@ -630,7 +630,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 68\n",
"# extra code this cell generates and saves Figure 68\n",
"\n",
"plt.figure(figsize=(8, 4))\n",
"\n",
@ -693,7 +693,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 69\n",
"# extra code this cell generates and saves Figure 69\n",
"\n",
"plt.figure(figsize=(8, 4))\n",
"y_pred = tree_clf_tweaked.predict(X_iris_all).reshape(lengths.shape)\n",

View File

@ -133,7 +133,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 73\n",
"# extra code this cell generates and saves Figure 73\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
@ -273,7 +273,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 75\n",
"# extra code this cell generates and saves Figure 75\n",
"\n",
"def plot_decision_boundary(clf, X, y, alpha=1.0):\n",
" axes=[-1.5, 2.4, -1, 1.5]\n",
@ -363,7 +363,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows how to compute the 63% proba\n",
"# extra code shows how to compute the 63% proba\n",
"print(1 - (1 - 1 / 1000) ** 1000)\n",
"print(1 - np.exp(-1))"
]
@ -413,7 +413,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code verifies that the predictions are identical\n",
"# extra code verifies that the predictions are identical\n",
"bag_clf.fit(X_train, y_train)\n",
"y_pred_bag = bag_clf.predict(X_test)\n",
"np.all(y_pred_bag == y_pred_rf) # same predictions"
@ -447,7 +447,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 76\n",
"# extra code this cell generates and saves Figure 76\n",
"\n",
"from sklearn.datasets import fetch_openml\n",
"\n",
@ -480,7 +480,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 78\n",
"# extra code this cell generates and saves Figure 78\n",
"\n",
"m = len(X_train)\n",
"\n",
@ -534,8 +534,8 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book in case you're curious to see what the decision boundary\n",
"# looks like for the AdaBoost classifier\n",
"# extra code in case you're curious to see what the decision boundary\n",
"# looks like for the AdaBoost classifier\n",
"plot_decision_boundary(ada_clf, X_train, y_train)"
]
},
@ -615,7 +615,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 79\n",
"# extra code this cell generates and saves Figure 79\n",
"\n",
"def plot_predictions(regressors, X, y, axes, style,\n",
" label=None, data_style=\"b.\", data_label=None):\n",
@ -715,7 +715,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 710\n",
"# extra code this cell generates and saves Figure 710\n",
"\n",
"fix, axes = plt.subplots(ncols=2, figsize=(10, 4), sharey=True)\n",
"\n",
@ -743,7 +743,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book at least not in this chapter, it's presented in chapter 2\n",
"# extra code at least not in this chapter, it's presented in chapter 2\n",
"\n",
"import tarfile\n",
"import urllib.request\n",
@ -795,7 +795,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book evaluate the RMSE stats for the hgb_reg model\n",
"# extra code evaluate the RMSE stats for the hgb_reg model\n",
"\n",
"from sklearn.model_selection import cross_val_score\n",
"\n",

View File

@ -91,8 +91,8 @@
"plt.rc('font', size=14)\n",
"plt.rc('axes', labelsize=14, titlesize=14)\n",
"plt.rc('legend', fontsize=14)\n",
"plt.rc('xtick',labelsize=10)\n",
"plt.rc('ytick',labelsize=10)"
"plt.rc('xtick', labelsize=10)\n",
"plt.rc('ytick', labelsize=10)"
]
},
{
@ -147,7 +147,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"import numpy as np\n",
"from scipy.spatial.transform import Rotation\n",
@ -177,7 +177,7 @@
},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 82\n",
"# extra code this cell generates and saves Figure 82\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
@ -245,7 +245,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 83\n",
"# extra code this cell generates and saves Figure 83\n",
"\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot(1, 1, 1, aspect='equal')\n",
@ -281,7 +281,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 84\n",
"# extra code this cell generates and saves Figure 84\n",
"\n",
"from matplotlib.colors import ListedColormap\n",
"\n",
@ -305,7 +305,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves plots for Figure 85\n",
"# extra code this cell generates and saves plots for Figure 85\n",
"\n",
"plt.figure(figsize=(10, 4))\n",
"\n",
@ -332,7 +332,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves plots for Figure 86\n",
"# extra code this cell generates and saves plots for Figure 86\n",
" \n",
"axes = [-11.5, 14, -2, 23, -12, 15]\n",
"x2s = np.linspace(axes[2], axes[3], 10)\n",
@ -404,7 +404,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 87\n",
"# extra code this cell generates and saves Figure 87\n",
"\n",
"angle = np.pi / 5\n",
"stretch = 5\n",
@ -507,7 +507,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows how to construct Σ from s\n",
"# extra code shows how to construct Σ from s\n",
"m, n = X.shape\n",
"Σ = np.zeros_like(X_centered)\n",
"Σ[:n, :n] = np.diag(s)\n",
@ -616,7 +616,7 @@
"metadata": {},
"outputs": [],
"source": [
"1 - pca.explained_variance_ratio_.sum() # not in the book"
"1 - pca.explained_variance_ratio_.sum() # extra code"
]
},
{
@ -678,7 +678,7 @@
"metadata": {},
"outputs": [],
"source": [
"pca.explained_variance_ratio_.sum() # not in the book"
"pca.explained_variance_ratio_.sum() # extra code"
]
},
{
@ -694,7 +694,7 @@
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(6,4))\n",
"plt.figure(figsize=(6, 4))\n",
"plt.plot(cumsum, linewidth=3)\n",
"plt.axis([0, 400, 0, 1])\n",
"plt.xlabel(\"Dimensions\")\n",
@ -795,7 +795,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 89\n",
"# extra code this cell generates and saves Figure 89\n",
"\n",
"plt.figure(figsize=(7, 4))\n",
"for idx, X in enumerate((X_train[::2100], X_recovered[::2100])):\n",
@ -929,7 +929,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book show the equation computed by johnson_lindenstrauss_min_dim\n",
"# extra code show the equation computed by johnson_lindenstrauss_min_dim\n",
"d = int(4 * np.log(m) / (ε ** 2 / 2 - ε ** 3 / 3))\n",
"d"
]
@ -983,7 +983,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book performance comparison between Gaussian and Sparse RP\n",
"# extra code performance comparison between Gaussian and Sparse RP\n",
"\n",
"from sklearn.random_projection import SparseRandomProjection\n",
"\n",
@ -1027,7 +1027,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 810\n",
"# extra code this cell generates and saves Figure 810\n",
"\n",
"plt.scatter(X_unrolled[:, 0], X_unrolled[:, 1],\n",
" c=t, cmap=darker_hot)\n",
@ -1047,7 +1047,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book shows how well correlated z1 is to t: LLE worked fine\n",
"# extra code shows how well correlated z1 is to t: LLE worked fine\n",
"plt.title(\"$z_1$ vs $t$\")\n",
"plt.scatter(X_unrolled[:, 0], t, c=t, cmap=darker_hot)\n",
"plt.xlabel(\"$z_1$\")\n",
@ -1099,11 +1099,11 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 811\n",
"# extra code this cell generates and saves Figure 811\n",
"\n",
"titles = [\"MDS\", \"Isomap\", \"t-SNE\"]\n",
"\n",
"plt.figure(figsize=(11,4))\n",
"plt.figure(figsize=(11, 4))\n",
"\n",
"for subplot, title, X_reduced in zip((131, 132, 133), titles,\n",
" (X_reduced_mds, X_reduced_isomap, X_reduced_tsne)):\n",

View File

@ -91,8 +91,8 @@
"plt.rc('font', size=14)\n",
"plt.rc('axes', labelsize=14, titlesize=14)\n",
"plt.rc('legend', fontsize=14)\n",
"plt.rc('xtick',labelsize=10)\n",
"plt.rc('ytick',labelsize=10)"
"plt.rc('xtick', labelsize=10)\n",
"plt.rc('ytick', labelsize=10)"
]
},
{
@ -147,7 +147,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 91\n",
"# extra code this cell generates and saves Figure 91\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.datasets import load_iris\n",
@ -192,7 +192,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"import numpy as np\n",
"from scipy import stats\n",
@ -263,7 +263,7 @@
"from sklearn.cluster import KMeans\n",
"from sklearn.datasets import make_blobs\n",
"\n",
"# not in the book the exact arguments of make_blobs() are not important\n",
"# extra code the exact arguments of make_blobs() are not important\n",
"blob_centers = np.array([[ 0.2, 2.3], [-1.5 , 2.3], [-2.8, 1.8],\n",
" [-2.8, 2.8], [-2.8, 1.3]])\n",
"blob_std = np.array([0.4, 0.3, 0.1, 0.1, 0.1])\n",
@ -288,7 +288,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 92\n",
"# extra code this cell generates and saves Figure 92\n",
"\n",
"def plot_clusters(X, y=None):\n",
" plt.scatter(X[:, 0], X[:, 1], c=y, s=1)\n",
@ -399,7 +399,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 93\n",
"# extra code this cell generates and saves Figure 93\n",
"\n",
"def plot_data(X):\n",
" plt.plot(X[:, 0], X[:, 1], 'k.', markersize=2)\n",
@ -489,7 +489,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"np.linalg.norm(np.tile(X_new, (1, k)).reshape(-1, k, 2)\n",
" - kmeans.cluster_centers_, axis=2).round(2)"
]
@ -532,7 +532,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 94\n",
"# extra code this cell generates and saves Figure 94\n",
"\n",
"kmeans_iter1 = KMeans(n_clusters=5, init=\"random\", n_init=1, max_iter=1,\n",
" random_state=5)\n",
@ -600,7 +600,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 95\n",
"# extra code this cell generates and saves Figure 95\n",
"\n",
"def plot_clusterer_comparison(clusterer1, clusterer2, X, title1=None,\n",
" title2=None):\n",
@ -647,7 +647,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"plt.figure(figsize=(8, 4))\n",
"plot_decision_boundaries(kmeans, X)"
]
@ -681,7 +681,7 @@
"metadata": {},
"outputs": [],
"source": [
"kmeans_rnd_init1.inertia_ # not in the book"
"kmeans_rnd_init1.inertia_ # extra code"
]
},
{
@ -690,7 +690,7 @@
"metadata": {},
"outputs": [],
"source": [
"kmeans_rnd_init2.inertia_ # not in the book"
"kmeans_rnd_init2.inertia_ # extra code"
]
},
{
@ -706,7 +706,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"X_dist = kmeans.transform(X)\n",
"(X_dist[np.arange(len(X_dist)), kmeans.labels_] ** 2).sum()"
]
@ -754,7 +754,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"kmeans_rnd_10_inits = KMeans(n_clusters=5, init=\"random\", n_init=10,\n",
" random_state=2)\n",
"kmeans_rnd_10_inits.fit(X)"
@ -773,7 +773,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"plt.figure(figsize=(8, 4))\n",
"plot_decision_boundaries(kmeans_rnd_10_inits, X)\n",
"plt.show()"
@ -964,7 +964,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 96\n",
"# extra code this cell generates and saves Figure 96\n",
"\n",
"from timeit import timeit\n",
"\n",
@ -981,7 +981,7 @@
" inertias[k - 1, 0] = kmeans_.inertia_\n",
" inertias[k - 1, 1] = minibatch_kmeans.inertia_\n",
"\n",
"plt.figure(figsize=(10,4))\n",
"plt.figure(figsize=(10, 4))\n",
"\n",
"plt.subplot(121)\n",
"plt.plot(range(1, max_k + 1), inertias[:, 0], \"r--\", label=\"K-Means\")\n",
@ -1024,7 +1024,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 97\n",
"# extra code this cell generates and saves Figure 97\n",
"\n",
"kmeans_k3 = KMeans(n_clusters=3, random_state=42)\n",
"kmeans_k8 = KMeans(n_clusters=8, random_state=42)\n",
@ -1072,7 +1072,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 98\n",
"# extra code this cell generates and saves Figure 98\n",
"\n",
"kmeans_per_k = [KMeans(n_clusters=k, random_state=42).fit(X)\n",
" for k in range(1, 10)]\n",
@ -1104,7 +1104,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"plot_decision_boundaries(kmeans_per_k[4 - 1], X)\n",
"plt.show()"
]
@ -1147,7 +1147,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 99\n",
"# extra code this cell generates and saves Figure 99\n",
"\n",
"silhouette_scores = [silhouette_score(X, model.labels_)\n",
" for model in kmeans_per_k[1:]]\n",
@ -1182,7 +1182,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 910\n",
"# extra code this cell generates and saves Figure 910\n",
"\n",
"from sklearn.metrics import silhouette_samples\n",
"from matplotlib.ticker import FixedLocator, FixedFormatter\n",
@ -1253,7 +1253,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 911\n",
"# extra code this cell generates and saves Figure 911\n",
"\n",
"X1, y1 = make_blobs(n_samples=1000, centers=((4, -4), (0, 0)), random_state=42)\n",
"X1 = X1.dot(np.array([[0.374, 0.95], [0.732, 0.598]]))\n",
@ -1303,7 +1303,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"root = \"https://raw.githubusercontent.com/ageron/handson-ml3/main/\"\n",
"filename = \"ladybug.png\"\n",
@ -1344,7 +1344,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 912\n",
"# extra code this cell generates and saves Figure 912\n",
"\n",
"segmented_imgs = []\n",
"n_colors = (10, 8, 6, 4, 2)\n",
@ -1353,7 +1353,7 @@
" segmented_img = kmeans.cluster_centers_[kmeans.labels_]\n",
" segmented_imgs.append(segmented_img.reshape(image.shape))\n",
"\n",
"plt.figure(figsize=(10,5))\n",
"plt.figure(figsize=(10, 5))\n",
"plt.subplots_adjust(wspace=0.05, hspace=0.1)\n",
"\n",
"plt.subplot(2, 3, 1)\n",
@ -1367,7 +1367,7 @@
" plt.title(f\"{n_clusters} colors\")\n",
" plt.axis('off')\n",
"\n",
"save_fig('image_segmentation_diagram', tight_layout=False)\n",
"save_fig('image_segmentation_plot', tight_layout=False)\n",
"plt.show()"
]
},
@ -1440,7 +1440,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book measure the accuracy when we use the whole training set\n",
"# extra code measure the accuracy when we use the whole training set\n",
"log_reg_full = LogisticRegression(max_iter=10_000)\n",
"log_reg_full.fit(X_train, y_train)\n",
"log_reg_full.score(X_test, y_test)"
@ -1479,7 +1479,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 913\n",
"# extra code this cell generates and saves Figure 913\n",
"\n",
"plt.figure(figsize=(8, 2))\n",
"for index, X_representative_digit in enumerate(X_representative_digits):\n",
@ -1488,7 +1488,7 @@
" interpolation=\"bilinear\")\n",
" plt.axis('off')\n",
"\n",
"save_fig(\"representative_images_diagram\", tight_layout=False)\n",
"save_fig(\"representative_images_plot\", tight_layout=False)\n",
"plt.show()"
]
},
@ -1694,7 +1694,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 914\n",
"# extra code this cell generates and saves Figure 914\n",
"\n",
"def plot_dbscan(dbscan, X, size, show_xlabels=True, show_ylabels=True):\n",
" core_mask = np.zeros_like(dbscan.labels_, dtype=bool)\n",
@ -1747,7 +1747,7 @@
"metadata": {},
"outputs": [],
"source": [
"dbscan = dbscan2 # not in the book the text says we now use eps=0.2"
"dbscan = dbscan2 # extra code the text says we now use eps=0.2"
]
},
{
@ -1787,7 +1787,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 915\n",
"# extra code this cell generates and saves Figure 915\n",
"\n",
"plt.figure(figsize=(6, 3))\n",
"plot_decision_boundaries(knn, X, show_centroids=False)\n",
@ -2172,7 +2172,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book bonus material\n",
"# extra code bonus material\n",
"\n",
"resolution = 100\n",
"grid = np.arange(-10, 10, 1 / resolution)\n",
@ -2197,7 +2197,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cells generates and saves Figure 916\n",
"# extra code this cells generates and saves Figure 916\n",
"\n",
"from matplotlib.colors import LogNorm\n",
"\n",
@ -2256,7 +2256,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 917\n",
"# extra code this cell generates and saves Figure 917\n",
"\n",
"gm_full = GaussianMixture(n_components=3, n_init=10,\n",
" covariance_type=\"full\", random_state=42)\n",
@ -2294,7 +2294,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book comparing covariance_type=\"full\" and covariance_type=\"diag\"\n",
"# extra code comparing covariance_type=\"full\" and covariance_type=\"diag\"\n",
"compare_gaussian_mixtures(gm_full, gm_diag, X)\n",
"plt.tight_layout()\n",
"plt.show()"
@ -2331,7 +2331,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 918\n",
"# extra code this cell generates and saves Figure 918\n",
"\n",
"plt.figure(figsize=(8, 4))\n",
"\n",
@ -2373,7 +2373,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 919\n",
"# extra code this cell generates and saves Figure 919\n",
"\n",
"from scipy.stats import norm\n",
"\n",
@ -2387,7 +2387,7 @@
"stds = np.linspace(stds_range[0], stds_range[1], 501)\n",
"Xs, Stds = np.meshgrid(xs, stds)\n",
"Z = 2 * norm.pdf(Xs - 1.0, 0, Stds) + norm.pdf(Xs + 4.0, 0, Stds)\n",
"Z = Z / Z.sum(axis=1)[:,np.newaxis] / (xs[1] - xs[0])\n",
"Z = Z / Z.sum(axis=1)[:, np.newaxis] / (xs[1] - xs[0])\n",
"\n",
"x_example_idx = (xs >= x_val).argmax() # index of the first value >= x_val\n",
"max_idx = Z[:, x_example_idx].argmax()\n",
@ -2479,7 +2479,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book bonus material\n",
"# extra code bonus material\n",
"n_clusters = 3\n",
"n_dims = 2\n",
"n_params_for_weights = n_clusters - 1\n",
@ -2514,7 +2514,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 920\n",
"# extra code this cell generates and saves Figure 920\n",
"\n",
"gms_per_k = [GaussianMixture(n_components=k, n_init=10, random_state=42).fit(X)\n",
" for k in range(1, 11)]\n",
@ -2576,7 +2576,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this figure is almost identical to Figure 916\n",
"# extra code this figure is almost identical to Figure 916\n",
"plt.figure(figsize=(8, 5))\n",
"plot_gaussian_mixture(bgm, X)\n",
"plt.show()"
@ -2588,7 +2588,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 921\n",
"# extra code this cell generates and saves Figure 921\n",
"\n",
"X_moons, y_moons = make_moons(n_samples=1000, noise=0.05, random_state=42)\n",
"\n",

View File

@ -77,7 +77,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"And TensorFlow ≥ 2.6:"
"And TensorFlow ≥ 2.7:"
]
},
{
@ -88,7 +88,7 @@
"source": [
"import tensorflow as tf\n",
"\n",
"assert tf.__version__ >= \"2.6.0\""
"assert tf.__version__ >= \"2.7.0\""
]
},
{
@ -189,7 +189,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book extra material\n",
"# extra code shows how to build and train a Perceptron\n",
"\n",
"from sklearn.linear_model import SGDClassifier\n",
"\n",
@ -213,7 +213,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book extra material\n",
"# extra code plots the decision boundary of a Perceptron on the iris dataset\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.colors import ListedColormap\n",
@ -256,7 +256,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code generates and saves Figure 108\n",
"# extra code this cell generates and saves Figure 108\n",
"\n",
"from scipy.special import expit as sigmoid\n",
"\n",
@ -358,7 +358,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this was left as an exercise for the reader\n",
"# extra code this was left as an exercise for the reader\n",
"\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.model_selection import train_test_split\n",
@ -470,7 +470,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"plt.imshow(X_train[0], cmap=\"binary\")\n",
"plt.axis('off')\n",
@ -539,7 +539,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 1010\n",
"# extra code this cell generates and saves Figure 1010\n",
"\n",
"n_rows = 4\n",
"n_cols = 10\n",
@ -585,7 +585,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book clear the session to reset the name counters\n",
"# extra code clear the session to reset the name counters\n",
"tf.keras.backend.clear_session()\n",
"tf.random.set_seed(42)\n",
"\n",
@ -612,7 +612,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book another way to display the model's architecture\n",
"# extra code another way to display the model's architecture\n",
"tf.keras.utils.plot_model(model, \"my_fashion_mnist_model.png\", show_shapes=True)"
]
},
@ -712,7 +712,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell is equivalent to the previous cell\n",
"# extra code this cell is equivalent to the previous cell\n",
"model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy,\n",
" optimizer=tf.keras.optimizers.SGD(),\n",
" metrics=[tf.keras.metrics.sparse_categorical_accuracy])"
@ -724,7 +724,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows how to convert class ids to one-hot vectors\n",
"# extra code shows how to convert class ids to one-hot vectors\n",
"tf.keras.utils.to_categorical([0, 5, 1, 0], num_classes=10)"
]
},
@ -741,7 +741,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this code shows how to convert one-hot vectors to class ids\n",
"# extra code shows how to convert one-hot vectors to class ids\n",
"np.argmax(\n",
" [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n",
@ -798,8 +798,8 @@
"pd.DataFrame(history.history).plot(\n",
" figsize=(8, 5), xlim=[0, 29], ylim=[0, 1], grid=True, xlabel=\"Epoch\",\n",
" style=[\"r--\", \"r--.\", \"b-\", \"b-*\"])\n",
"plt.legend(loc=\"lower left\") # not in the book\n",
"save_fig(\"keras_learning_curves_plot\") # not in the book\n",
"plt.legend(loc=\"lower left\") # extra code\n",
"save_fig(\"keras_learning_curves_plot\") # extra code\n",
"plt.show()"
]
},
@ -809,7 +809,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book shows how to shift the training curve by -1/2 epoch\n",
"# extra code shows how to shift the training curve by -1/2 epoch\n",
"plt.figure(figsize=(8, 5))\n",
"for key, style in zip(history.history, [\"r--\", \"r--.\", \"b-\", \"b-*\"]):\n",
" epochs = np.array(history.epoch) + (0 if key.startswith(\"val_\") else -0.5)\n",
@ -883,7 +883,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book this cell generates and saves Figure 1012\n",
"# extra code this cell generates and saves Figure 1012\n",
"plt.figure(figsize=(7.2, 2.4))\n",
"for index, image in enumerate(X_new):\n",
" plt.subplot(1, 3, index + 1)\n",
@ -915,7 +915,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book load and split the California housing dataset, like earlier\n",
"# extra code load and split the California housing dataset, like earlier\n",
"housing = fetch_california_housing()\n",
"X_train_full, X_test, y_train_full, y_test = train_test_split(\n",
" housing.data, housing.target, random_state=42)\n",
@ -986,7 +986,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book reset the name counters and make the code reproducible\n",
"# extra code reset the name counters and make the code reproducible\n",
"tf.keras.backend.clear_session()\n",
"tf.random.set_seed(42)"
]
@ -1050,7 +1050,7 @@
"metadata": {},
"outputs": [],
"source": [
"tf.random.set_seed(42) # not in the book"
"tf.random.set_seed(42) # extra code"
]
},
{
@ -1220,7 +1220,7 @@
" aux_output = self.aux_output(hidden2)\n",
" return output, aux_output\n",
"\n",
"tf.random.set_seed(42) # not in the book just for reproducibility\n",
"tf.random.set_seed(42) # extra code just for reproducibility\n",
"model = WideAndDeepModel(30, activation=\"relu\", name=\"my_cool_model\")"
]
},
@ -1256,7 +1256,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book delete the directory, in case it already exists\n",
"# extra code delete the directory, in case it already exists\n",
"\n",
"import shutil\n",
"\n",
@ -1278,7 +1278,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book show the contents of the my_keras_model/ directory\n",
"# extra code show the contents of the my_keras_model/ directory\n",
"for path in sorted(Path(\"my_keras_model\").glob(\"**/*\")):\n",
" print(path)"
]
@ -1317,7 +1317,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book show the list of my_weights.* files\n",
"# extra code show the list of my_weights.* files\n",
"for path in sorted(Path().glob(\"my_weights.*\")):\n",
" print(path)"
]
@ -1335,7 +1335,7 @@
"metadata": {},
"outputs": [],
"source": [
"shutil.rmtree(\"my_checkpoints\", ignore_errors=True) # not in the book"
"shutil.rmtree(\"my_checkpoints\", ignore_errors=True) # extra code"
]
},
{
@ -1411,7 +1411,7 @@
"metadata": {},
"outputs": [],
"source": [
"if \"google.colab\" in sys.modules: # not in the book\n",
"if \"google.colab\" in sys.modules: # extra code\n",
" %pip install -q -U tensorboard-plugin-profile"
]
},
@ -1447,7 +1447,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book builds the first regression model we used earlier\n",
"# extra code builds the first regression model we used earlier\n",
"tf.keras.backend.clear_session()\n",
"tf.random.set_seed(42)\n",
"norm_layer = tf.keras.layers.Normalization(input_shape=X_train.shape[1:])\n",
@ -1516,7 +1516,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"if \"google.colab\" in sys.modules:\n",
" from google.colab import output\n",
@ -1574,7 +1574,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book\n",
"# extra code\n",
"\n",
"if \"google.colab\" in sys.modules:\n",
" !tensorboard dev upload --logdir ./my_logs --one_shot \\\n",
@ -1632,7 +1632,7 @@
"metadata": {},
"outputs": [],
"source": [
"# not in the book extra material\n",
"# extra code lists all running TensorBoard server instances\n",
"\n",
"from tensorboard import notebook\n",
"\n",