diff --git a/03_classification.ipynb b/03_classification.ipynb index 235b680..9812293 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -1744,8 +1744,8 @@ "source": [ "from sklearn.model_selection import cross_val_score\n", "\n", - "scores = cross_val_score(svm_clf, X_train, y_train, cv=10)\n", - "scores.mean()" + "svm_scores = cross_val_score(svm_clf, X_train, y_train, cv=10)\n", + "svm_scores.mean()" ] }, { @@ -1771,8 +1771,8 @@ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "forest_clf = RandomForestClassifier(random_state=42)\n", - "scores = cross_val_score(forest_clf, X_train, y_train, cv=10)\n", - "scores.mean()" + "forest_scores = cross_val_score(forest_clf, X_train, y_train, cv=10)\n", + "forest_scores.mean()" ] }, { @@ -1782,6 +1782,27 @@ "That's much better!" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instead of just looking at the mean accuracy across the 10 cross-validation folds, let's plot all 10 scores for each model, along with a box plot highlighting the lower and upper quartiles, and \"whiskers\" showing the extent of the scores (thanks to Nevin Yilmaz for suggesting this visualization). Note that the `boxplot()` function detects outliers (called \"fliers\") and does not include them within the whiskers. Specifically, if the lower quartile is $Q_1$ and the upper quartile is $Q_3$, then the interquartile range $IQR = Q_3 - Q_1$ (this is the box's height), and any score lower than $Q_1 - 1.5 \\times IQR$ is a flier, and so is any score greater than $Q3 + 1.5 \\times IQR$." + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 4))\n", + "plt.plot([1]*10, svm_scores, \".\")\n", + "plt.plot([2]*10, forest_scores, \".\")\n", + "plt.boxplot([svm_scores, forest_scores], labels=(\"SVM\",\"Random Forest\"))\n", + "plt.ylabel(\"Accuracy\", fontsize=14)\n", + "plt.show()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1796,7 +1817,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 125, "metadata": {}, "outputs": [], "source": [ @@ -1806,7 +1827,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 126, "metadata": {}, "outputs": [], "source": [ @@ -1830,7 +1851,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -1857,7 +1878,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -1873,7 +1894,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -1885,7 +1906,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -1894,7 +1915,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 131, "metadata": {}, "outputs": [], "source": [ @@ -1910,7 +1931,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 132, "metadata": {}, "outputs": [], "source": [ @@ -1925,7 +1946,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 133, "metadata": {}, "outputs": [], "source": [ @@ -1942,7 +1963,7 @@ }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 134, "metadata": {}, "outputs": [], "source": [ @@ -1951,7 +1972,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ @@ -1967,7 +1988,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 136, "metadata": {}, "outputs": [], "source": [ @@ -1986,7 +2007,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 137, "metadata": {}, "outputs": [], "source": [ @@ -2002,7 +2023,7 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 138, "metadata": {}, "outputs": [], "source": [ @@ -2011,7 +2032,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 139, "metadata": {}, "outputs": [], "source": [ @@ -2034,7 +2055,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 140, "metadata": {}, "outputs": [], "source": [ @@ -2051,7 +2072,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 141, "metadata": {}, "outputs": [], "source": [ @@ -2067,7 +2088,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 142, "metadata": {}, "outputs": [], "source": [ @@ -2089,7 +2110,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ @@ -2113,7 +2134,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 144, "metadata": {}, "outputs": [], "source": [ @@ -2132,7 +2153,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 145, "metadata": {}, "outputs": [], "source": [ @@ -2148,7 +2169,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 146, "metadata": {}, "outputs": [], "source": [ @@ -2172,7 +2193,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ @@ -2190,7 +2211,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 148, "metadata": {}, "outputs": [], "source": [ @@ -2216,7 +2237,7 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 149, "metadata": {}, "outputs": [], "source": [ @@ -2239,7 +2260,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 150, "metadata": {}, "outputs": [], "source": [ @@ -2291,7 +2312,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ @@ -2316,7 +2337,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 152, "metadata": {}, "outputs": [], "source": [ @@ -2348,7 +2369,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 153, "metadata": {}, "outputs": [], "source": [ @@ -2359,7 +2380,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 154, "metadata": {}, "outputs": [], "source": [ @@ -2375,7 +2396,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 155, "metadata": {}, "outputs": [], "source": [ @@ -2391,7 +2412,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ @@ -2407,7 +2428,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 157, "metadata": {}, "outputs": [], "source": [ @@ -2430,7 +2451,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 158, "metadata": {}, "outputs": [], "source": [