Rename scipy.stats.reciprocal with loguniform, fixes #44

2023-02-18 21:46:54 +13:00 · 2023-02-18 21:46:54 +13:00 · ec67c6962c
parent 2f8f998165
commit ec67c6962c
2 changed files with 19 additions and 19 deletions
--- a/02_end_to_end_machine_learning_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@ -4841,7 +4841,7 @@
    "* `scipy.stats.uniform(a, b)`: this is very similar, but for _continuous_ hyperparameters.\n",
    "* `scipy.stats.geom(1 / scale)`: for discrete values, when you want to sample roughly in a given scale. E.g., with scale=1000 most samples will be in this ballpark, but ~10% of all samples will be <100 and ~10% will be >2300.\n",
    "* `scipy.stats.expon(scale)`: this is the continuous equivalent of `geom`. Just set `scale` to the most likely value.\n",
-    "* `scipy.stats.reciprocal(a, b)`: when you have almost no idea what the optimal hyperparameter value's scale is. If you set a=0.01 and b=100, then you're just as likely to sample a value between 0.01 and 0.1 as a value between 10 and 100.\n"
+    "* `scipy.stats.loguniform(a, b)`: when you have almost no idea what the optimal hyperparameter value's scale is. If you set a=0.01 and b=100, then you're just as likely to sample a value between 0.01 and 0.1 as a value between 10 and 100.\n"
   ]
  },
  {
@ -4923,7 +4923,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Here are the PDF for `expon()` and `reciprocal()` (left column), as well as the PDF of log(X) (right column). The right column shows the distribution of hyperparameter _scales_. You can see that `expon()` favors hyperparameters with roughly the desired scale, with a longer tail towards the smaller scales. But `reciprocal()` does not favor any scale, they are all equally likely:"
+    "Here are the PDF for `expon()` and `loguniform()` (left column), as well as the PDF of log(X) (right column). The right column shows the distribution of hyperparameter _scales_. You can see that `expon()` favors hyperparameters with roughly the desired scale, with a longer tail towards the smaller scales. But `loguniform()` does not favor any scale, they are all equally likely:"
   ]
  },
  {
@ -4947,9 +4947,9 @@
    }
   ],
   "source": [
-    "# extra code – shows the difference between expon and reciprocal\n",
+    "# extra code – shows the difference between expon and loguniform\n",
    "\n",
-    "from scipy.stats import reciprocal\n",
+    "from scipy.stats import loguniform\n",
    "\n",
    "xs1 = np.linspace(0, 7, 500)\n",
    "expon_distrib = expon(scale=1).pdf(xs1)\n",
@ -4958,10 +4958,10 @@
    "log_expon_distrib = np.exp(log_xs2 - np.exp(log_xs2))\n",
    "\n",
    "xs3 = np.linspace(0.001, 1000, 500)\n",
-    "reciprocal_distrib = reciprocal(0.001, 1000).pdf(xs3)\n",
+    "loguniform_distrib = loguniform(0.001, 1000).pdf(xs3)\n",
    "\n",
    "log_xs4 = np.linspace(np.log(0.001), np.log(1000), 500)\n",
-    "log_reciprocal_distrib = uniform(np.log(0.001), np.log(1000)).pdf(log_xs4)\n",
+    "log_loguniform_distrib = uniform(np.log(0.001), np.log(1000)).pdf(log_xs4)\n",
    "\n",
    "plt.figure(figsize=(12, 7))\n",
    "\n",
@ -4979,16 +4979,16 @@
    "plt.axis([-5, 3, 0, 1])\n",
    "\n",
    "plt.subplot(2, 2, 3)\n",
-    "plt.fill_between(xs3, reciprocal_distrib,\n",
-    "                 label=\"scipy.reciprocal(0.001, 1000)\")\n",
+    "plt.fill_between(xs3, loguniform_distrib,\n",
+    "                 label=\"scipy.loguniform(0.001, 1000)\")\n",
    "plt.xlabel(\"Hyperparameter value\")\n",
    "plt.ylabel(\"PDF\")\n",
    "plt.legend()\n",
    "plt.axis([0.001, 1000, 0, 0.005])\n",
    "\n",
    "plt.subplot(2, 2, 4)\n",
-    "plt.fill_between(log_xs4, log_reciprocal_distrib,\n",
-    "                 label=\"log(X) with X ~ reciprocal\")\n",
+    "plt.fill_between(log_xs4, log_loguniform_distrib,\n",
+    "                 label=\"log(X) with X ~ loguniform\")\n",
    "plt.xlabel(\"Log of hyperparameter value\")\n",
    "plt.legend()\n",
    "plt.axis([-8, 1, 0, 0.2])\n",
@ -5523,15 +5523,15 @@
   ],
   "source": [
    "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from scipy.stats import expon, reciprocal\n",
+    "from scipy.stats import expon, loguniform\n",
    "\n",
    "# see https://docs.scipy.org/doc/scipy/reference/stats.html\n",
-    "# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n",
+    "# for `expon()` and `loguniform()` documentation and more probability distribution functions.\n",
    "\n",
    "# Note: gamma is ignored when kernel is \"linear\"\n",
    "param_distribs = {\n",
    "        'svr__kernel': ['linear', 'rbf'],\n",
-    "        'svr__C': reciprocal(20, 200_000),\n",
+    "        'svr__C': loguniform(20, 200_000),\n",
    "        'svr__gamma': expon(scale=1.0),\n",
    "    }\n",
    "\n",
@ -5641,7 +5641,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We used the `reciprocal()` distribution for `C`, meaning we did not have a clue what the optimal scale of `C` was before running the random search. It explored the range from 20 to 200 just as much as the range from 2,000 to 20,000 or from 20,000 to 200,000."
+    "We used the `loguniform()` distribution for `C`, meaning we did not have a clue what the optimal scale of `C` was before running the random search. It explored the range from 20 to 200 just as much as the range from 2,000 to 20,000 or from 20,000 to 200,000."
   ]
  },
  {
@ -5989,7 +5989,7 @@
    "param_distribs = {\n",
    "    \"preprocessing__geo__estimator__n_neighbors\": range(1, 30),\n",
    "    \"preprocessing__geo__estimator__weights\": [\"distance\", \"uniform\"],\n",
-    "    \"svr__C\": reciprocal(20, 200_000),\n",
+    "    \"svr__C\": loguniform(20, 200_000),\n",
    "    \"svr__gamma\": expon(scale=1.0),\n",
    "}\n",
    "\n",
--- a/05_support_vector_machines.ipynb
+++ b/05_support_vector_machines.ipynb
@ -2186,10 +2186,10 @@
   ],
   "source": [
    "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from scipy.stats import reciprocal, uniform\n",
+    "from scipy.stats import loguniform, uniform\n",
    "\n",
    "param_distrib = {\n",
-    "    \"svc__gamma\": reciprocal(0.001, 0.1),\n",
+    "    \"svc__gamma\": loguniform(0.001, 0.1),\n",
    "    \"svc__C\": uniform(1, 10)\n",
    "}\n",
    "rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib, n_iter=100, cv=5,\n",
@ -2472,12 +2472,12 @@
   "source": [
    "from sklearn.svm import SVR\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from scipy.stats import reciprocal, uniform\n",
+    "from scipy.stats import loguniform, uniform\n",
    "\n",
    "svm_clf = make_pipeline(StandardScaler(), SVR())\n",
    "\n",
    "param_distrib = {\n",
-    "    \"svr__gamma\": reciprocal(0.001, 0.1),\n",
+    "    \"svr__gamma\": loguniform(0.001, 0.1),\n",
    "    \"svr__C\": uniform(1, 10)\n",
    "}\n",
    "rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib,\n",