diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index d08c7fc..c50889a 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -1649,15 +1649,15 @@ "outputs": [], "source": [ "from sklearn.model_selection import RandomizedSearchCV\n", - "from scipy.stats import expon\n", + "from scipy.stats import expon, reciprocal\n", "\n", "# see https://docs.scipy.org/doc/scipy-0.19.0/reference/stats.html\n", - "# for `expon()` documentation and more probability distribution functions.\n", + "# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n", "\n", "# Note: gamma is ignored when kernel is \"linear\"\n", "param_distribs = {\n", " 'kernel': ['linear', 'rbf'],\n", - " 'C': np.exp(9 * rnd.rand(1000) + 3), # from exp(3) to exp(12) (i.e, ~20.0 to ~162,755)\n", + " 'C': reciprocal(20, 200000),\n", " 'gamma': expon(scale=1.0),\n", " }\n", "\n", @@ -1777,10 +1777,11 @@ }, "outputs": [], "source": [ - "samples = np.exp(9 * rnd.rand(10000) + 2)\n", + "reciprocal_distrib = reciprocal(20, 200000)\n", + "samples = reciprocal_distrib.rvs(10000)\n", "plt.figure(figsize=(10, 4))\n", "plt.subplot(121)\n", - "plt.title(\"Home made distribution\\nexp(x) with x uniformly sampled in [2,11]\")\n", + "plt.title(\"Reciprocal distribution (scale=1.0)\")\n", "plt.hist(samples, bins=50)\n", "plt.subplot(122)\n", "plt.title(\"Log of this distribution\")\n", @@ -1788,6 +1789,13 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The reciprocal distribution is useful when you have no idea what the scale of the hyperparameter should be (indeed, as you can see on the figure on the right, all scales are equally likely, within the given range), whereas the exponential distribution is best when you know (more or less) what the scale of the hyperparameter should be." + ] + }, { "cell_type": "markdown", "metadata": {