Add boxplot in notebook 03, exercise 3

main
Aurélien Geron 2018-05-26 15:01:33 +02:00
parent 703815bca9
commit a7c50e12c4
1 changed files with 59 additions and 38 deletions

View File

@ -1744,8 +1744,8 @@
"source": [ "source": [
"from sklearn.model_selection import cross_val_score\n", "from sklearn.model_selection import cross_val_score\n",
"\n", "\n",
"scores = cross_val_score(svm_clf, X_train, y_train, cv=10)\n", "svm_scores = cross_val_score(svm_clf, X_train, y_train, cv=10)\n",
"scores.mean()" "svm_scores.mean()"
] ]
}, },
{ {
@ -1771,8 +1771,8 @@
"from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n",
"\n", "\n",
"forest_clf = RandomForestClassifier(random_state=42)\n", "forest_clf = RandomForestClassifier(random_state=42)\n",
"scores = cross_val_score(forest_clf, X_train, y_train, cv=10)\n", "forest_scores = cross_val_score(forest_clf, X_train, y_train, cv=10)\n",
"scores.mean()" "forest_scores.mean()"
] ]
}, },
{ {
@ -1782,6 +1782,27 @@
"That's much better!" "That's much better!"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Instead of just looking at the mean accuracy across the 10 cross-validation folds, let's plot all 10 scores for each model, along with a box plot highlighting the lower and upper quartiles, and \"whiskers\" showing the extent of the scores (thanks to Nevin Yilmaz for suggesting this visualization). Note that the `boxplot()` function detects outliers (called \"fliers\") and does not include them within the whiskers. Specifically, if the lower quartile is $Q_1$ and the upper quartile is $Q_3$, then the interquartile range $IQR = Q_3 - Q_1$ (this is the box's height), and any score lower than $Q_1 - 1.5 \\times IQR$ is a flier, and so is any score greater than $Q3 + 1.5 \\times IQR$."
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(8, 4))\n",
"plt.plot([1]*10, svm_scores, \".\")\n",
"plt.plot([2]*10, forest_scores, \".\")\n",
"plt.boxplot([svm_scores, forest_scores], labels=(\"SVM\",\"Random Forest\"))\n",
"plt.ylabel(\"Accuracy\", fontsize=14)\n",
"plt.show()"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -1796,7 +1817,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 124, "execution_count": 125,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1806,7 +1827,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 125, "execution_count": 126,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1830,7 +1851,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 126, "execution_count": 127,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1857,7 +1878,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 127, "execution_count": 128,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1873,7 +1894,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 128, "execution_count": 129,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1885,7 +1906,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 129, "execution_count": 130,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1894,7 +1915,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 130, "execution_count": 131,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1910,7 +1931,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 131, "execution_count": 132,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1925,7 +1946,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 132, "execution_count": 133,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1942,7 +1963,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 133, "execution_count": 134,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1951,7 +1972,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 134, "execution_count": 135,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1967,7 +1988,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 135, "execution_count": 136,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1986,7 +2007,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 136, "execution_count": 137,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2002,7 +2023,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 137, "execution_count": 138,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2011,7 +2032,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 138, "execution_count": 139,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2034,7 +2055,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 139, "execution_count": 140,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2051,7 +2072,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 140, "execution_count": 141,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2067,7 +2088,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 141, "execution_count": 142,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2089,7 +2110,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 142, "execution_count": 143,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2113,7 +2134,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 143, "execution_count": 144,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2132,7 +2153,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 144, "execution_count": 145,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2148,7 +2169,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 145, "execution_count": 146,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2172,7 +2193,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 146, "execution_count": 147,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2190,7 +2211,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 147, "execution_count": 148,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2216,7 +2237,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 148, "execution_count": 149,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2239,7 +2260,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 149, "execution_count": 150,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2291,7 +2312,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 150, "execution_count": 151,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2316,7 +2337,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 151, "execution_count": 152,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2348,7 +2369,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 152, "execution_count": 153,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2359,7 +2380,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 153, "execution_count": 154,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2375,7 +2396,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 154, "execution_count": 155,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2391,7 +2412,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 155, "execution_count": 156,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2407,7 +2428,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 156, "execution_count": 157,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2430,7 +2451,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 157, "execution_count": 158,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [