Synchronize chapter 7's code and the corresponding notebook's code
parent
0e8579943c
commit
b7779802f0
|
@ -134,9 +134,7 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -144,23 +142,76 @@
|
|||
"from sklearn.datasets import make_moons\n",
|
||||
"\n",
|
||||
"X, y = make_moons(n_samples=500, noise=0.30, random_state=42)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.ensemble import VotingClassifier\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"\n",
|
||||
"log_clf = LogisticRegression(random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"svm_clf = SVC(random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
" voting='hard')\n",
|
||||
"voting_clf.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"for clf in (log_clf, rnd_clf, svm_clf, voting_clf):\n",
|
||||
" clf.fit(X_train, y_train)\n",
|
||||
" y_pred = clf.predict(X_test)\n",
|
||||
" print(clf.__class__.__name__, accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"log_clf = LogisticRegression(random_state=42)\n",
|
||||
"rnd_clf = RandomForestClassifier(random_state=42)\n",
|
||||
"svm_clf = SVC(probability=True, random_state=42)\n",
|
||||
"\n",
|
||||
"voting_clf = VotingClassifier(\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
" voting='soft'\n",
|
||||
" )\n",
|
||||
"voting_clf.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
|
||||
" voting='soft')\n",
|
||||
"voting_clf.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"for clf in (log_clf, rnd_clf, svm_clf, voting_clf):\n",
|
||||
|
@ -181,7 +232,25 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import BaggingClassifier\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"\n",
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
|
||||
" max_samples=100, bootstrap=True, n_jobs=-1, random_state=42)\n",
|
||||
"bag_clf.fit(X_train, y_train)\n",
|
||||
"y_pred = bag_clf.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -189,23 +258,13 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import make_moons\n",
|
||||
"from sklearn.ensemble import BaggingClassifier\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"\n",
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
|
||||
" max_samples=100, bootstrap=True, n_jobs=-1, random_state=42\n",
|
||||
" )\n",
|
||||
"bag_clf.fit(X_train, y_train)\n",
|
||||
"y_pred = bag_clf.predict(X_test)\n",
|
||||
"print(accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -221,7 +280,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -251,7 +310,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -282,7 +341,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -291,17 +350,25 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
|
||||
" n_estimators=500, max_samples=1.0, bootstrap=True,\n",
|
||||
" n_jobs=-1, random_state=42\n",
|
||||
" )\n",
|
||||
" DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
|
||||
" n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bag_clf.fit(X_train, y_train)\n",
|
||||
"y_pred = bag_clf.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -319,7 +386,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -332,7 +399,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -344,13 +411,13 @@
|
|||
"iris = load_iris()\n",
|
||||
"rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1, random_state=42)\n",
|
||||
"rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n",
|
||||
"for name, importance in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
|
||||
" print(name, \"=\", importance)"
|
||||
"for name, score in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
|
||||
" print(name, score)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -363,7 +430,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -374,7 +441,7 @@
|
|||
"plt.figure(figsize=(6, 4))\n",
|
||||
"\n",
|
||||
"for i in range(15):\n",
|
||||
" tree_clf = DecisionTreeClassifier(max_leaf_nodes=16, random_state=42+i)\n",
|
||||
" tree_clf = DecisionTreeClassifier(max_leaf_nodes=16, random_state=42 + i)\n",
|
||||
" indices_with_replacement = rnd.randint(0, len(X_train), len(X_train))\n",
|
||||
" tree_clf.fit(X[indices_with_replacement], y[indices_with_replacement])\n",
|
||||
" plot_decision_boundary(tree_clf, X, y, axes=[-1.5, 2.5, -1, 1.5], alpha=0.02, contour=False)\n",
|
||||
|
@ -394,7 +461,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 21,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -404,15 +471,14 @@
|
|||
"source": [
|
||||
"bag_clf = BaggingClassifier(\n",
|
||||
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
|
||||
" bootstrap=True, n_jobs=-1, oob_score=True, random_state=40\n",
|
||||
")\n",
|
||||
" bootstrap=True, n_jobs=-1, oob_score=True, random_state=40)\n",
|
||||
"bag_clf.fit(X_train, y_train)\n",
|
||||
"bag_clf.oob_score_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -420,12 +486,12 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bag_clf.oob_decision_function_[:10]"
|
||||
"bag_clf.oob_decision_function_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -450,7 +516,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -458,34 +524,13 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from six.moves import urllib\n",
|
||||
"from sklearn.datasets import fetch_mldata\n",
|
||||
"try:\n",
|
||||
" mnist = fetch_mldata('MNIST original')\n",
|
||||
"except urllib.error.HTTPError as ex:\n",
|
||||
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
||||
"\n",
|
||||
" # Alternative method to load MNIST, if mldata.org is down\n",
|
||||
" from scipy.io import loadmat\n",
|
||||
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
||||
" mnist_path = \"./mnist-original.mat\"\n",
|
||||
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
||||
" with open(mnist_path, \"wb\") as f:\n",
|
||||
" content = response.read()\n",
|
||||
" f.write(content)\n",
|
||||
" mnist_raw = loadmat(mnist_path)\n",
|
||||
" mnist = {\n",
|
||||
" \"data\": mnist_raw[\"data\"].T,\n",
|
||||
" \"target\": mnist_raw[\"label\"][0],\n",
|
||||
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
||||
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
||||
" }\n",
|
||||
" print(\"Success!\")"
|
||||
"mnist = fetch_mldata('MNIST original')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -499,7 +544,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -516,7 +561,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -545,27 +590,34 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 28,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import AdaBoostClassifier\n",
|
||||
"\n",
|
||||
"ada_clf = AdaBoostClassifier(\n",
|
||||
" DecisionTreeClassifier(max_depth=2), n_estimators=200,\n",
|
||||
" algorithm=\"SAMME.R\", learning_rate=0.5, random_state=42\n",
|
||||
" )\n",
|
||||
"ada_clf.fit(X_train, y_train)\n",
|
||||
" DecisionTreeClassifier(max_depth=1), n_estimators=200,\n",
|
||||
" algorithm=\"SAMME.R\", learning_rate=0.5, random_state=42)\n",
|
||||
"ada_clf.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plot_decision_boundary(ada_clf, X, y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -599,7 +651,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 31,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -622,7 +674,20 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 32,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rnd.seed(42)\n",
|
||||
"X = rnd.rand(100, 1) - 0.5\n",
|
||||
"y = 3*X[:, 0]**2 + 0.05 * rnd.randn(100)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -632,29 +697,72 @@
|
|||
"source": [
|
||||
"from sklearn.tree import DecisionTreeRegressor\n",
|
||||
"\n",
|
||||
"rnd.seed(42)\n",
|
||||
"X = rnd.rand(100, 1) - 0.5\n",
|
||||
"y = 3*X[:, 0]**2 + 0.05 * rnd.randn(100)\n",
|
||||
"\n",
|
||||
"tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
|
||||
"tree_reg1.fit(X, y)\n",
|
||||
"\n",
|
||||
"y2 = y - tree_reg1.predict(X)\n",
|
||||
"tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
|
||||
"tree_reg2.fit(X, y2)\n",
|
||||
"\n",
|
||||
"y3 = y2 - tree_reg2.predict(X)\n",
|
||||
"tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
|
||||
"tree_reg3.fit(X, y3)\n",
|
||||
"\n",
|
||||
"X_new = np.array([[0.8]])\n",
|
||||
"y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))\n",
|
||||
"print(y_pred)"
|
||||
"tree_reg1.fit(X, y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 34,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y2 = y - tree_reg1.predict(X)\n",
|
||||
"tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
|
||||
"tree_reg2.fit(X, y2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y3 = y2 - tree_reg2.predict(X)\n",
|
||||
"tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)\n",
|
||||
"tree_reg3.fit(X, y3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_new = np.array([[0.8]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -707,7 +815,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 40,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -717,12 +825,30 @@
|
|||
"source": [
|
||||
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
||||
"\n",
|
||||
"gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=0.1, random_state=42)\n",
|
||||
"gbrt.fit(X, y)\n",
|
||||
"\n",
|
||||
"gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0, random_state=42)\n",
|
||||
"gbrt.fit(X, y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gbrt_slow = GradientBoostingRegressor(max_depth=2, n_estimators=200, learning_rate=0.1, random_state=42)\n",
|
||||
"gbrt_slow.fit(X, y)\n",
|
||||
"\n",
|
||||
"gbrt_slow.fit(X, y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize=(11,4))\n",
|
||||
"\n",
|
||||
"plt.subplot(121)\n",
|
||||
|
@ -749,7 +875,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 43,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -757,37 +883,37 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
"\n",
|
||||
"X_train, X_val, y_train, y_val = train_test_split(X, y)\n",
|
||||
"X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=49)\n",
|
||||
"\n",
|
||||
"gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=0.1, random_state=42)\n",
|
||||
"gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, random_state=42)\n",
|
||||
"gbrt.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_n_estimators = np.argmin(errors)\n",
|
||||
"min_error = errors[best_n_estimators]\n",
|
||||
"errors = [mean_squared_error(y_val, y_pred)\n",
|
||||
" for y_pred in gbrt.staged_predict(X_val)]\n",
|
||||
"bst_n_estimators = np.argmin(errors)\n",
|
||||
"\n",
|
||||
"gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=best_n_estimators, learning_rate=0.1, random_state=42)\n",
|
||||
"gbrt_best = GradientBoostingRegressor(max_depth=2,n_estimators=bst_n_estimators, random_state=42)\n",
|
||||
"gbrt_best.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 44,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"min_error = np.min(errors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -799,17 +925,17 @@
|
|||
"\n",
|
||||
"plt.subplot(121)\n",
|
||||
"plt.plot(errors, \"b.-\")\n",
|
||||
"plt.plot([best_n_estimators, best_n_estimators], [0, min_error], \"k--\")\n",
|
||||
"plt.plot([bst_n_estimators, bst_n_estimators], [0, min_error], \"k--\")\n",
|
||||
"plt.plot([0, 120], [min_error, min_error], \"k--\")\n",
|
||||
"plt.plot(best_n_estimators, min_error, \"ko\")\n",
|
||||
"plt.text(best_n_estimators, min_error*1.2, \"Minimum\", ha=\"center\", fontsize=14)\n",
|
||||
"plt.plot(bst_n_estimators, min_error, \"ko\")\n",
|
||||
"plt.text(bst_n_estimators, min_error*1.2, \"Minimum\", ha=\"center\", fontsize=14)\n",
|
||||
"plt.axis([0, 120, 0, 0.01])\n",
|
||||
"plt.xlabel(\"Number of trees\")\n",
|
||||
"plt.title(\"Validation error\", fontsize=14)\n",
|
||||
"\n",
|
||||
"plt.subplot(122)\n",
|
||||
"plot_predictions([gbrt_best], X, y, axes=[-0.5, 0.5, -0.1, 0.8])\n",
|
||||
"plt.title(\"Best model (55 trees)\", fontsize=14)\n",
|
||||
"plt.title(\"Best model (%d trees)\" % bst_n_estimators, fontsize=14)\n",
|
||||
"\n",
|
||||
"save_fig(\"early_stopping_gbrt_plot\")\n",
|
||||
"plt.show()"
|
||||
|
@ -817,7 +943,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 46,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -825,7 +951,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=1, learning_rate=0.1, random_state=42, warm_start=True)\n",
|
||||
"gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True, random_state=42)\n",
|
||||
"\n",
|
||||
"min_val_error = float(\"inf\")\n",
|
||||
"error_going_up = 0\n",
|
||||
|
@ -845,7 +971,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 47,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -905,7 +1031,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2+"
|
||||
"version": "3.5.3"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "252px",
|
||||
|
|
Loading…
Reference in New Issue