Rename scipy.stats.reciprocal with loguniform, fixes #44
parent
2f8f998165
commit
ec67c6962c
|
@ -4841,7 +4841,7 @@
|
|||
"* `scipy.stats.uniform(a, b)`: this is very similar, but for _continuous_ hyperparameters.\n",
|
||||
"* `scipy.stats.geom(1 / scale)`: for discrete values, when you want to sample roughly in a given scale. E.g., with scale=1000 most samples will be in this ballpark, but ~10% of all samples will be <100 and ~10% will be >2300.\n",
|
||||
"* `scipy.stats.expon(scale)`: this is the continuous equivalent of `geom`. Just set `scale` to the most likely value.\n",
|
||||
"* `scipy.stats.reciprocal(a, b)`: when you have almost no idea what the optimal hyperparameter value's scale is. If you set a=0.01 and b=100, then you're just as likely to sample a value between 0.01 and 0.1 as a value between 10 and 100.\n"
|
||||
"* `scipy.stats.loguniform(a, b)`: when you have almost no idea what the optimal hyperparameter value's scale is. If you set a=0.01 and b=100, then you're just as likely to sample a value between 0.01 and 0.1 as a value between 10 and 100.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -4923,7 +4923,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here are the PDF for `expon()` and `reciprocal()` (left column), as well as the PDF of log(X) (right column). The right column shows the distribution of hyperparameter _scales_. You can see that `expon()` favors hyperparameters with roughly the desired scale, with a longer tail towards the smaller scales. But `reciprocal()` does not favor any scale, they are all equally likely:"
|
||||
"Here are the PDF for `expon()` and `loguniform()` (left column), as well as the PDF of log(X) (right column). The right column shows the distribution of hyperparameter _scales_. You can see that `expon()` favors hyperparameters with roughly the desired scale, with a longer tail towards the smaller scales. But `loguniform()` does not favor any scale, they are all equally likely:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -4947,9 +4947,9 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# extra code – shows the difference between expon and reciprocal\n",
|
||||
"# extra code – shows the difference between expon and loguniform\n",
|
||||
"\n",
|
||||
"from scipy.stats import reciprocal\n",
|
||||
"from scipy.stats import loguniform\n",
|
||||
"\n",
|
||||
"xs1 = np.linspace(0, 7, 500)\n",
|
||||
"expon_distrib = expon(scale=1).pdf(xs1)\n",
|
||||
|
@ -4958,10 +4958,10 @@
|
|||
"log_expon_distrib = np.exp(log_xs2 - np.exp(log_xs2))\n",
|
||||
"\n",
|
||||
"xs3 = np.linspace(0.001, 1000, 500)\n",
|
||||
"reciprocal_distrib = reciprocal(0.001, 1000).pdf(xs3)\n",
|
||||
"loguniform_distrib = loguniform(0.001, 1000).pdf(xs3)\n",
|
||||
"\n",
|
||||
"log_xs4 = np.linspace(np.log(0.001), np.log(1000), 500)\n",
|
||||
"log_reciprocal_distrib = uniform(np.log(0.001), np.log(1000)).pdf(log_xs4)\n",
|
||||
"log_loguniform_distrib = uniform(np.log(0.001), np.log(1000)).pdf(log_xs4)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(12, 7))\n",
|
||||
"\n",
|
||||
|
@ -4979,16 +4979,16 @@
|
|||
"plt.axis([-5, 3, 0, 1])\n",
|
||||
"\n",
|
||||
"plt.subplot(2, 2, 3)\n",
|
||||
"plt.fill_between(xs3, reciprocal_distrib,\n",
|
||||
" label=\"scipy.reciprocal(0.001, 1000)\")\n",
|
||||
"plt.fill_between(xs3, loguniform_distrib,\n",
|
||||
" label=\"scipy.loguniform(0.001, 1000)\")\n",
|
||||
"plt.xlabel(\"Hyperparameter value\")\n",
|
||||
"plt.ylabel(\"PDF\")\n",
|
||||
"plt.legend()\n",
|
||||
"plt.axis([0.001, 1000, 0, 0.005])\n",
|
||||
"\n",
|
||||
"plt.subplot(2, 2, 4)\n",
|
||||
"plt.fill_between(log_xs4, log_reciprocal_distrib,\n",
|
||||
" label=\"log(X) with X ~ reciprocal\")\n",
|
||||
"plt.fill_between(log_xs4, log_loguniform_distrib,\n",
|
||||
" label=\"log(X) with X ~ loguniform\")\n",
|
||||
"plt.xlabel(\"Log of hyperparameter value\")\n",
|
||||
"plt.legend()\n",
|
||||
"plt.axis([-8, 1, 0, 0.2])\n",
|
||||
|
@ -5523,15 +5523,15 @@
|
|||
],
|
||||
"source": [
|
||||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||||
"from scipy.stats import expon, reciprocal\n",
|
||||
"from scipy.stats import expon, loguniform\n",
|
||||
"\n",
|
||||
"# see https://docs.scipy.org/doc/scipy/reference/stats.html\n",
|
||||
"# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n",
|
||||
"# for `expon()` and `loguniform()` documentation and more probability distribution functions.\n",
|
||||
"\n",
|
||||
"# Note: gamma is ignored when kernel is \"linear\"\n",
|
||||
"param_distribs = {\n",
|
||||
" 'svr__kernel': ['linear', 'rbf'],\n",
|
||||
" 'svr__C': reciprocal(20, 200_000),\n",
|
||||
" 'svr__C': loguniform(20, 200_000),\n",
|
||||
" 'svr__gamma': expon(scale=1.0),\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
|
@ -5641,7 +5641,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We used the `reciprocal()` distribution for `C`, meaning we did not have a clue what the optimal scale of `C` was before running the random search. It explored the range from 20 to 200 just as much as the range from 2,000 to 20,000 or from 20,000 to 200,000."
|
||||
"We used the `loguniform()` distribution for `C`, meaning we did not have a clue what the optimal scale of `C` was before running the random search. It explored the range from 20 to 200 just as much as the range from 2,000 to 20,000 or from 20,000 to 200,000."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -5989,7 +5989,7 @@
|
|||
"param_distribs = {\n",
|
||||
" \"preprocessing__geo__estimator__n_neighbors\": range(1, 30),\n",
|
||||
" \"preprocessing__geo__estimator__weights\": [\"distance\", \"uniform\"],\n",
|
||||
" \"svr__C\": reciprocal(20, 200_000),\n",
|
||||
" \"svr__C\": loguniform(20, 200_000),\n",
|
||||
" \"svr__gamma\": expon(scale=1.0),\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
|
|
|
@ -2186,10 +2186,10 @@
|
|||
],
|
||||
"source": [
|
||||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||||
"from scipy.stats import reciprocal, uniform\n",
|
||||
"from scipy.stats import loguniform, uniform\n",
|
||||
"\n",
|
||||
"param_distrib = {\n",
|
||||
" \"svc__gamma\": reciprocal(0.001, 0.1),\n",
|
||||
" \"svc__gamma\": loguniform(0.001, 0.1),\n",
|
||||
" \"svc__C\": uniform(1, 10)\n",
|
||||
"}\n",
|
||||
"rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib, n_iter=100, cv=5,\n",
|
||||
|
@ -2472,12 +2472,12 @@
|
|||
"source": [
|
||||
"from sklearn.svm import SVR\n",
|
||||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||||
"from scipy.stats import reciprocal, uniform\n",
|
||||
"from scipy.stats import loguniform, uniform\n",
|
||||
"\n",
|
||||
"svm_clf = make_pipeline(StandardScaler(), SVR())\n",
|
||||
"\n",
|
||||
"param_distrib = {\n",
|
||||
" \"svr__gamma\": reciprocal(0.001, 0.1),\n",
|
||||
" \"svr__gamma\": loguniform(0.001, 0.1),\n",
|
||||
" \"svr__C\": uniform(1, 10)\n",
|
||||
"}\n",
|
||||
"rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib,\n",
|
||||
|
|
Loading…
Reference in New Issue