Add code to compute a confidence interval

2018-05-08 19:41:47 +02:00 · 2018-05-08 19:41:47 +02:00 · 55adea1ff4
parent e05d4b36ac
commit 55adea1ff4
1 changed files with 96 additions and 28 deletions
--- a/02_end_to_end_machine_learning_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@ -1437,6 +1437,74 @@
    "final_rmse"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can compute a 95% confidence interval for the test RMSE:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy import stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confidence = 0.95\n",
+    "squared_errors = (final_predictions - y_test) ** 2\n",
+    "mean = squared_errors.mean()\n",
+    "m = len(squared_errors)\n",
+    "\n",
+    "np.sqrt(stats.t.interval(confidence, m - 1,\n",
+    "                         loc=np.mean(squared_errors),\n",
+    "                         scale=stats.sem(squared_errors)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We could compute the interval manually like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tscore = stats.t.ppf((1 + confidence) / 2, df=m - 1)\n",
+    "tmargin = tscore * squared_errors.std(ddof=1) / np.sqrt(m)\n",
+    "np.sqrt(mean - tmargin), np.sqrt(mean + tmargin)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, we could use a z-scores rather than t-scores:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zscore = stats.norm.ppf((1 + confidence) / 2)\n",
+    "zmargin = zscore * squared_errors.std(ddof=1) / np.sqrt(m)\n",
+    "np.sqrt(mean - zmargin), np.sqrt(mean + zmargin)"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -1453,7 +1521,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 103,
+   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1475,7 +1543,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1484,7 +1552,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1503,7 +1571,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1541,7 +1609,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1567,7 +1635,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1585,7 +1653,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1615,14 +1683,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "from scipy.stats import expon, reciprocal\n",
    "\n",
-    "# see https://docs.scipy.org/doc/scipy-0.19.0/reference/stats.html\n",
+    "# see https://docs.scipy.org/doc/scipy/reference/stats.html\n",
    "# for `expon()` and `reciprocal()` documentation and more probability distribution functions.\n",
    "\n",
    "# Note: gamma is ignored when kernel is \"linear\"\n",
@ -1648,7 +1716,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1666,7 +1734,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1689,7 +1757,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1714,7 +1782,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1753,7 +1821,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1789,7 +1857,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1805,7 +1873,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1815,7 +1883,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1831,7 +1899,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1847,7 +1915,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1859,7 +1927,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1875,7 +1943,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1891,7 +1959,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1921,7 +1989,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1934,7 +2002,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1950,7 +2018,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1984,7 +2052,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2000,7 +2068,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2038,7 +2106,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.5.2"
  },
  "nav_menu": {
   "height": "279px",