Use default splitter="best" instead of splitter="random", fixes #340

main
Aurélien Geron 2021-03-04 15:17:19 +13:00
parent e2edfb274b
commit 33f9ff10b4
1 changed files with 7 additions and 5 deletions

View File

@ -242,7 +242,7 @@
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
" DecisionTreeClassifier(), n_estimators=500,\n",
" max_samples=100, bootstrap=True, random_state=42)\n",
"bag_clf.fit(X_train, y_train)\n",
"y_pred = bag_clf.predict(X_test)"
@ -327,9 +327,11 @@
"metadata": {},
"outputs": [],
"source": [
"from math import ceil, sqrt\n",
"\n",
"bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
" n_estimators=500, max_samples=1.0, bootstrap=True, random_state=42)"
" DecisionTreeClassifier(max_leaf_nodes=16),\n",
" n_estimators=500, max_features=ceil(sqrt(X_train.shape[1])), random_state=42)"
]
},
{
@ -362,7 +364,7 @@
"metadata": {},
"outputs": [],
"source": [
"np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions"
"np.sum(y_pred == y_pred_rf) / len(y_pred) # very similar predictions"
]
},
{
@ -419,7 +421,7 @@
"outputs": [],
"source": [
"bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
" DecisionTreeClassifier(), n_estimators=500,\n",
" bootstrap=True, oob_score=True, random_state=40)\n",
"bag_clf.fit(X_train, y_train)\n",
"bag_clf.oob_score_"