Use default splitter="best" instead of splitter="random", fixes #340

main
Aurélien Geron 2021-03-04 15:17:19 +13:00
parent e2edfb274b
commit 33f9ff10b4
1 changed files with 7 additions and 5 deletions

View File

@ -242,7 +242,7 @@
"from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n",
"\n", "\n",
"bag_clf = BaggingClassifier(\n", "bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n", " DecisionTreeClassifier(), n_estimators=500,\n",
" max_samples=100, bootstrap=True, random_state=42)\n", " max_samples=100, bootstrap=True, random_state=42)\n",
"bag_clf.fit(X_train, y_train)\n", "bag_clf.fit(X_train, y_train)\n",
"y_pred = bag_clf.predict(X_test)" "y_pred = bag_clf.predict(X_test)"
@ -327,9 +327,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from math import ceil, sqrt\n",
"\n",
"bag_clf = BaggingClassifier(\n", "bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n", " DecisionTreeClassifier(max_leaf_nodes=16),\n",
" n_estimators=500, max_samples=1.0, bootstrap=True, random_state=42)" " n_estimators=500, max_features=ceil(sqrt(X_train.shape[1])), random_state=42)"
] ]
}, },
{ {
@ -362,7 +364,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions" "np.sum(y_pred == y_pred_rf) / len(y_pred) # very similar predictions"
] ]
}, },
{ {
@ -419,7 +421,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"bag_clf = BaggingClassifier(\n", "bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n", " DecisionTreeClassifier(), n_estimators=500,\n",
" bootstrap=True, oob_score=True, random_state=40)\n", " bootstrap=True, oob_score=True, random_state=40)\n",
"bag_clf.fit(X_train, y_train)\n", "bag_clf.fit(X_train, y_train)\n",
"bag_clf.oob_score_" "bag_clf.oob_score_"