Add code to compute a confidence interval

main
Aurélien Geron 2018-05-08 19:41:47 +02:00
parent e05d4b36ac
commit 55adea1ff4
1 changed files with 96 additions and 28 deletions

View File

@ -1437,6 +1437,74 @@
"final_rmse"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can compute a 95% confidence interval for the test RMSE:"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"confidence = 0.95\n",
"squared_errors = (final_predictions - y_test) ** 2\n",
"mean = squared_errors.mean()\n",
"m = len(squared_errors)\n",
"\n",
"np.sqrt(stats.t.interval(confidence, m - 1,\n",
" loc=np.mean(squared_errors),\n",
" scale=stats.sem(squared_errors)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We could compute the interval manually like this:"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"tscore = stats.t.ppf((1 + confidence) / 2, df=m - 1)\n",
"tmargin = tscore * squared_errors.std(ddof=1) / np.sqrt(m)\n",
"np.sqrt(mean - tmargin), np.sqrt(mean + tmargin)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, we could use a z-scores rather than t-scores:"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"zscore = stats.norm.ppf((1 + confidence) / 2)\n",
"zmargin = zscore * squared_errors.std(ddof=1) / np.sqrt(m)\n",
"np.sqrt(mean - zmargin), np.sqrt(mean + zmargin)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -1453,7 +1521,7 @@
},
{
"cell_type": "code",
"execution_count": 103,
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
@ -1475,7 +1543,7 @@
},
{
"cell_type": "code",
"execution_count": 104,
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
@ -1484,7 +1552,7 @@
},
{
"cell_type": "code",
"execution_count": 105,
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
@ -1503,7 +1571,7 @@
},
{
"cell_type": "code",
"execution_count": 106,
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
@ -1541,7 +1609,7 @@
},
{
"cell_type": "code",
"execution_count": 107,
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
@ -1567,7 +1635,7 @@
},
{
"cell_type": "code",
"execution_count": 108,
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
@ -1585,7 +1653,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
@ -1615,14 +1683,14 @@
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import RandomizedSearchCV\n",
"from scipy.stats import expon, reciprocal\n",
"\n",
"# see https://docs.scipy.org/doc/scipy-0.19.0/reference/stats.html\n",
"# see https://docs.scipy.org/doc/scipy/reference/stats.html\n",
"# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n",
"\n",
"# Note: gamma is ignored when kernel is \"linear\"\n",
@ -1648,7 +1716,7 @@
},
{
"cell_type": "code",
"execution_count": 111,
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
@ -1666,7 +1734,7 @@
},
{
"cell_type": "code",
"execution_count": 112,
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
@ -1689,7 +1757,7 @@
},
{
"cell_type": "code",
"execution_count": 113,
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
@ -1714,7 +1782,7 @@
},
{
"cell_type": "code",
"execution_count": 114,
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
@ -1753,7 +1821,7 @@
},
{
"cell_type": "code",
"execution_count": 115,
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
@ -1789,7 +1857,7 @@
},
{
"cell_type": "code",
"execution_count": 116,
"execution_count": 120,
"metadata": {},
"outputs": [],
"source": [
@ -1805,7 +1873,7 @@
},
{
"cell_type": "code",
"execution_count": 117,
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
@ -1815,7 +1883,7 @@
},
{
"cell_type": "code",
"execution_count": 118,
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
@ -1831,7 +1899,7 @@
},
{
"cell_type": "code",
"execution_count": 119,
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
@ -1847,7 +1915,7 @@
},
{
"cell_type": "code",
"execution_count": 120,
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
@ -1859,7 +1927,7 @@
},
{
"cell_type": "code",
"execution_count": 121,
"execution_count": 125,
"metadata": {},
"outputs": [],
"source": [
@ -1875,7 +1943,7 @@
},
{
"cell_type": "code",
"execution_count": 122,
"execution_count": 126,
"metadata": {},
"outputs": [],
"source": [
@ -1891,7 +1959,7 @@
},
{
"cell_type": "code",
"execution_count": 123,
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
@ -1921,7 +1989,7 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
@ -1934,7 +2002,7 @@
},
{
"cell_type": "code",
"execution_count": 125,
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
@ -1950,7 +2018,7 @@
},
{
"cell_type": "code",
"execution_count": 126,
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
@ -1984,7 +2052,7 @@
},
{
"cell_type": "code",
"execution_count": 127,
"execution_count": 131,
"metadata": {},
"outputs": [],
"source": [
@ -2000,7 +2068,7 @@
},
{
"cell_type": "code",
"execution_count": 128,
"execution_count": 132,
"metadata": {},
"outputs": [],
"source": [
@ -2038,7 +2106,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.5.2"
},
"nav_menu": {
"height": "279px",