From ec67c6962c23aa5b54c3034fafd7c2077c0c049a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Sat, 18 Feb 2023 21:46:54 +1300
Subject: [PATCH] Rename scipy.stats.reciprocal with loguniform, fixes #44

---
 02_end_to_end_machine_learning_project.ipynb | 30 ++++++++++----------
 05_support_vector_machines.ipynb             |  8 +++---
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb
index fbc5746..94cda7a 100644
--- a/02_end_to_end_machine_learning_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@@ -4841,7 +4841,7 @@
     "* `scipy.stats.uniform(a, b)`: this is very similar, but for _continuous_ hyperparameters.\n",
     "* `scipy.stats.geom(1 / scale)`: for discrete values, when you want to sample roughly in a given scale. E.g., with scale=1000 most samples will be in this ballpark, but ~10% of all samples will be <100 and ~10% will be >2300.\n",
     "* `scipy.stats.expon(scale)`: this is the continuous equivalent of `geom`. Just set `scale` to the most likely value.\n",
-    "* `scipy.stats.reciprocal(a, b)`: when you have almost no idea what the optimal hyperparameter value's scale is. If you set a=0.01 and b=100, then you're just as likely to sample a value between 0.01 and 0.1 as a value between 10 and 100.\n"
+    "* `scipy.stats.loguniform(a, b)`: when you have almost no idea what the optimal hyperparameter value's scale is. If you set a=0.01 and b=100, then you're just as likely to sample a value between 0.01 and 0.1 as a value between 10 and 100.\n"
    ]
   },
   {
@@ -4923,7 +4923,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Here are the PDF for `expon()` and `reciprocal()` (left column), as well as the PDF of log(X) (right column). The right column shows the distribution of hyperparameter _scales_. You can see that `expon()` favors hyperparameters with roughly the desired scale, with a longer tail towards the smaller scales. But `reciprocal()` does not favor any scale, they are all equally likely:"
+    "Here are the PDF for `expon()` and `loguniform()` (left column), as well as the PDF of log(X) (right column). The right column shows the distribution of hyperparameter _scales_. You can see that `expon()` favors hyperparameters with roughly the desired scale, with a longer tail towards the smaller scales. But `loguniform()` does not favor any scale, they are all equally likely:"
    ]
   },
   {
@@ -4947,9 +4947,9 @@
     }
    ],
    "source": [
-    "# extra code – shows the difference between expon and reciprocal\n",
+    "# extra code – shows the difference between expon and loguniform\n",
     "\n",
-    "from scipy.stats import reciprocal\n",
+    "from scipy.stats import loguniform\n",
     "\n",
     "xs1 = np.linspace(0, 7, 500)\n",
     "expon_distrib = expon(scale=1).pdf(xs1)\n",
@@ -4958,10 +4958,10 @@
     "log_expon_distrib = np.exp(log_xs2 - np.exp(log_xs2))\n",
     "\n",
     "xs3 = np.linspace(0.001, 1000, 500)\n",
-    "reciprocal_distrib = reciprocal(0.001, 1000).pdf(xs3)\n",
+    "loguniform_distrib = loguniform(0.001, 1000).pdf(xs3)\n",
     "\n",
     "log_xs4 = np.linspace(np.log(0.001), np.log(1000), 500)\n",
-    "log_reciprocal_distrib = uniform(np.log(0.001), np.log(1000)).pdf(log_xs4)\n",
+    "log_loguniform_distrib = uniform(np.log(0.001), np.log(1000)).pdf(log_xs4)\n",
     "\n",
     "plt.figure(figsize=(12, 7))\n",
     "\n",
@@ -4979,16 +4979,16 @@
     "plt.axis([-5, 3, 0, 1])\n",
     "\n",
     "plt.subplot(2, 2, 3)\n",
-    "plt.fill_between(xs3, reciprocal_distrib,\n",
-    "                 label=\"scipy.reciprocal(0.001, 1000)\")\n",
+    "plt.fill_between(xs3, loguniform_distrib,\n",
+    "                 label=\"scipy.loguniform(0.001, 1000)\")\n",
     "plt.xlabel(\"Hyperparameter value\")\n",
     "plt.ylabel(\"PDF\")\n",
     "plt.legend()\n",
     "plt.axis([0.001, 1000, 0, 0.005])\n",
     "\n",
     "plt.subplot(2, 2, 4)\n",
-    "plt.fill_between(log_xs4, log_reciprocal_distrib,\n",
-    "                 label=\"log(X) with X ~ reciprocal\")\n",
+    "plt.fill_between(log_xs4, log_loguniform_distrib,\n",
+    "                 label=\"log(X) with X ~ loguniform\")\n",
     "plt.xlabel(\"Log of hyperparameter value\")\n",
     "plt.legend()\n",
     "plt.axis([-8, 1, 0, 0.2])\n",
@@ -5523,15 +5523,15 @@
    ],
    "source": [
     "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from scipy.stats import expon, reciprocal\n",
+    "from scipy.stats import expon, loguniform\n",
     "\n",
     "# see https://docs.scipy.org/doc/scipy/reference/stats.html\n",
-    "# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n",
+    "# for `expon()` and `loguniform()` documentation and more probability distribution functions.\n",
     "\n",
     "# Note: gamma is ignored when kernel is \"linear\"\n",
     "param_distribs = {\n",
     "        'svr__kernel': ['linear', 'rbf'],\n",
-    "        'svr__C': reciprocal(20, 200_000),\n",
+    "        'svr__C': loguniform(20, 200_000),\n",
     "        'svr__gamma': expon(scale=1.0),\n",
     "    }\n",
     "\n",
@@ -5641,7 +5641,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We used the `reciprocal()` distribution for `C`, meaning we did not have a clue what the optimal scale of `C` was before running the random search. It explored the range from 20 to 200 just as much as the range from 2,000 to 20,000 or from 20,000 to 200,000."
+    "We used the `loguniform()` distribution for `C`, meaning we did not have a clue what the optimal scale of `C` was before running the random search. It explored the range from 20 to 200 just as much as the range from 2,000 to 20,000 or from 20,000 to 200,000."
    ]
   },
   {
@@ -5989,7 +5989,7 @@
     "param_distribs = {\n",
     "    \"preprocessing__geo__estimator__n_neighbors\": range(1, 30),\n",
     "    \"preprocessing__geo__estimator__weights\": [\"distance\", \"uniform\"],\n",
-    "    \"svr__C\": reciprocal(20, 200_000),\n",
+    "    \"svr__C\": loguniform(20, 200_000),\n",
     "    \"svr__gamma\": expon(scale=1.0),\n",
     "}\n",
     "\n",
diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb
index 42dd42c..d3da40b 100644
--- a/05_support_vector_machines.ipynb
+++ b/05_support_vector_machines.ipynb
@@ -2186,10 +2186,10 @@
    ],
    "source": [
     "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from scipy.stats import reciprocal, uniform\n",
+    "from scipy.stats import loguniform, uniform\n",
     "\n",
     "param_distrib = {\n",
-    "    \"svc__gamma\": reciprocal(0.001, 0.1),\n",
+    "    \"svc__gamma\": loguniform(0.001, 0.1),\n",
     "    \"svc__C\": uniform(1, 10)\n",
     "}\n",
     "rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib, n_iter=100, cv=5,\n",
@@ -2472,12 +2472,12 @@
    "source": [
     "from sklearn.svm import SVR\n",
     "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from scipy.stats import reciprocal, uniform\n",
+    "from scipy.stats import loguniform, uniform\n",
     "\n",
     "svm_clf = make_pipeline(StandardScaler(), SVR())\n",
     "\n",
     "param_distrib = {\n",
-    "    \"svr__gamma\": reciprocal(0.001, 0.1),\n",
+    "    \"svr__gamma\": loguniform(0.001, 0.1),\n",
     "    \"svr__C\": uniform(1, 10)\n",
     "}\n",
     "rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib,\n",