Update all notebooks assuming we are all in the future now: sklearn 0.20+, python 3.5+, TF 2.0 preview

main
Aurélien Geron 2019-01-18 23:08:37 +08:00
parent 10c432a997
commit 6b8dff91d0
12 changed files with 1186 additions and 2625 deletions

View File

@ -102,7 +102,6 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Code example\n", "# Code example\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
@ -495,7 +494,6 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Code example\n", "# Code example\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
@ -684,6 +682,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Replace this linear model:\n", "# Replace this linear model:\n",
"import sklearn.linear_model\n",
"model = sklearn.linear_model.LinearRegression()" "model = sklearn.linear_model.LinearRegression()"
] ]
}, },
@ -694,6 +693,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# with this k-neighbors regression model:\n", "# with this k-neighbors regression model:\n",
"import sklearn.neighbors\n",
"model = sklearn.neighbors.KNeighborsRegressor(n_neighbors=3)" "model = sklearn.neighbors.KNeighborsRegressor(n_neighbors=3)"
] ]
}, },
@ -717,7 +717,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -73,7 +73,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Linear regression using the Normal Equation" "This notebook assumes you have installed Scikit-Learn ≥0.20."
] ]
}, },
{ {
@ -81,6 +81,23 @@
"execution_count": 2, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear regression using the Normal Equation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [ "source": [
"import numpy as np\n", "import numpy as np\n",
"\n", "\n",
@ -90,7 +107,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -104,7 +121,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -114,7 +131,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -123,7 +140,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -135,7 +152,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -154,7 +171,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -170,11 +187,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import LinearRegression\n", "from sklearn.linear_model import LinearRegression\n",
"\n",
"lin_reg = LinearRegression()\n", "lin_reg = LinearRegression()\n",
"lin_reg.fit(X, y)\n", "lin_reg.fit(X, y)\n",
"lin_reg.intercept_, lin_reg.coef_" "lin_reg.intercept_, lin_reg.coef_"
@ -182,7 +200,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -198,7 +216,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -215,7 +233,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -238,14 +256,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"eta = 0.1\n", "eta = 0.1 # learning rate\n",
"n_iterations = 1000\n", "n_iterations = 1000\n",
"m = 100\n", "m = 100\n",
"theta = np.random.randn(2,1)\n", "\n",
"theta = np.random.randn(2,1) # random initialization\n",
"\n", "\n",
"for iteration in range(n_iterations):\n", "for iteration in range(n_iterations):\n",
" gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n", " gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n",
@ -254,7 +273,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -263,7 +282,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -272,7 +291,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -298,7 +317,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -324,7 +343,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -335,7 +354,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -371,8 +390,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 21,
"metadata": {}, "metadata": {
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"theta" "theta"
@ -380,18 +401,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 22,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import SGDRegressor\n", "from sklearn.linear_model import SGDRegressor\n",
"sgd_reg = SGDRegressor(max_iter=50, tol=-np.infty, penalty=None, eta0=0.1, random_state=42)\n", "\n",
"sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1, random_state=42)\n",
"sgd_reg.fit(X, y.ravel())" "sgd_reg.fit(X, y.ravel())"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -407,7 +429,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 24,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -440,7 +462,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -449,7 +471,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -460,7 +482,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -485,7 +507,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 28,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -497,7 +519,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 29,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -508,7 +530,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 30,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -522,7 +544,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 31,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -534,7 +556,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -543,7 +565,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -554,7 +576,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -573,7 +595,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -604,7 +626,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -630,7 +652,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 37,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -643,7 +665,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -669,17 +691,47 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import Ridge\n",
"\n",
"np.random.seed(42)\n", "np.random.seed(42)\n",
"m = 20\n", "m = 20\n",
"X = 3 * np.random.rand(m, 1)\n", "X = 3 * np.random.rand(m, 1)\n",
"y = 1 + 0.5 * X + np.random.randn(m, 1) / 1.5\n", "y = 1 + 0.5 * X + np.random.randn(m, 1) / 1.5\n",
"X_new = np.linspace(0, 3, 100).reshape(100, 1)\n", "X_new = np.linspace(0, 3, 100).reshape(100, 1)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import Ridge\n",
"\n", "\n",
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n", "def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
" for alpha, style in zip(alphas, (\"b-\", \"g--\", \"r:\")):\n", " for alpha, style in zip(alphas, (\"b-\", \"g--\", \"r:\")):\n",
@ -711,42 +763,26 @@
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"from sklearn.linear_model import Ridge\n", "**Note**: to be future-proof, we set `max_iter=1000` and `tol=1e-3` because these will be the default values in Scikit-Learn 0.21."
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 43,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"sgd_reg = SGDRegressor(max_iter=50, tol=-np.infty, penalty=\"l2\", random_state=42)\n", "sgd_reg = SGDRegressor(penalty=\"l2\", max_iter=1000, tol=1e-3, random_state=42)\n",
"sgd_reg.fit(X, y.ravel())\n", "sgd_reg.fit(X, y.ravel())\n",
"sgd_reg.predict([[1.5]])" "sgd_reg.predict([[1.5]])"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
"ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -757,7 +793,7 @@
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)\n", "plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n", "plt.subplot(122)\n",
"plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1, random_state=42)\n", "plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), random_state=42)\n",
"\n", "\n",
"save_fig(\"lasso_regression_plot\")\n", "save_fig(\"lasso_regression_plot\")\n",
"plt.show()" "plt.show()"
@ -765,7 +801,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -777,7 +813,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 44, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -789,10 +825,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 47,
"metadata": { "metadata": {},
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"np.random.seed(42)\n", "np.random.seed(42)\n",
@ -800,23 +834,65 @@
"X = 6 * np.random.rand(m, 1) - 3\n", "X = 6 * np.random.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + np.random.randn(m, 1)\n", "y = 2 + X + 0.5 * X**2 + np.random.randn(m, 1)\n",
"\n", "\n",
"X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n", "X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Early stopping example:"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from sklearn.base import clone\n",
"\n", "\n",
"poly_scaler = Pipeline([\n", "poly_scaler = Pipeline([\n",
" (\"poly_features\", PolynomialFeatures(degree=90, include_bias=False)),\n", " (\"poly_features\", PolynomialFeatures(degree=90, include_bias=False)),\n",
" (\"std_scaler\", StandardScaler()),\n", " (\"std_scaler\", StandardScaler())\n",
" ])\n", " ])\n",
"\n", "\n",
"X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n", "X_train_poly_scaled = poly_scaler.fit_transform(X_train)\n",
"X_val_poly_scaled = poly_scaler.transform(X_val)\n", "X_val_poly_scaled = poly_scaler.transform(X_val)\n",
"\n", "\n",
"sgd_reg = SGDRegressor(max_iter=1,\n", "sgd_reg = SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True,\n",
" tol=-np.infty,\n", " penalty=None, learning_rate=\"constant\", eta0=0.0005, random_state=42)\n",
" penalty=None,\n", "\n",
" eta0=0.0005,\n", "minimum_val_error = float(\"inf\")\n",
" warm_start=True,\n", "best_epoch = None\n",
" learning_rate=\"constant\",\n", "best_model = None\n",
" random_state=42)\n", "for epoch in range(1000):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" val_error = mean_squared_error(y_val, y_val_predict)\n",
" if val_error < minimum_val_error:\n",
" minimum_val_error = val_error\n",
" best_epoch = epoch\n",
" best_model = clone(sgd_reg)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the graph:"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"sgd_reg = SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True,\n",
" penalty=None, learning_rate=\"constant\", eta0=0.0005, random_state=42)\n",
"\n", "\n",
"n_epochs = 500\n", "n_epochs = 500\n",
"train_errors, val_errors = [], []\n", "train_errors, val_errors = [], []\n",
@ -851,30 +927,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.base import clone\n",
"sgd_reg = SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True, penalty=None,\n",
" learning_rate=\"constant\", eta0=0.0005, random_state=42)\n",
"\n",
"minimum_val_error = float(\"inf\")\n",
"best_epoch = None\n",
"best_model = None\n",
"for epoch in range(1000):\n",
" sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off\n",
" y_val_predict = sgd_reg.predict(X_val_poly_scaled)\n",
" val_error = mean_squared_error(y_val, y_val_predict)\n",
" if val_error < minimum_val_error:\n",
" minimum_val_error = val_error\n",
" best_epoch = epoch\n",
" best_model = clone(sgd_reg)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -883,7 +936,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 48, "execution_count": 51,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -894,7 +947,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 49, "execution_count": 52,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -921,7 +974,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 50, "execution_count": 53,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -989,7 +1042,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 54,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1010,7 +1063,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 52, "execution_count": 55,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1021,7 +1074,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 53, "execution_count": 56,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1030,7 +1083,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 57,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1038,20 +1091,27 @@
"y = (iris[\"target\"] == 2).astype(np.int) # 1 if Iris-Virginica, else 0" "y = (iris[\"target\"] == 2).astype(np.int) # 1 if Iris-Virginica, else 0"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note**: To be future-proof we set `solver=\"lbfgs\"` since this will be the default value in Scikit-Learn 0.22."
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 58,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import LogisticRegression\n", "from sklearn.linear_model import LogisticRegression\n",
"log_reg = LogisticRegression(solver=\"liblinear\", random_state=42)\n", "log_reg = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X, y)" "log_reg.fit(X, y)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1071,7 +1131,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1098,7 +1158,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 58, "execution_count": 61,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1107,7 +1167,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 59, "execution_count": 62,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1116,7 +1176,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 60, "execution_count": 63,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1125,7 +1185,7 @@
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int)\n", "y = (iris[\"target\"] == 2).astype(np.int)\n",
"\n", "\n",
"log_reg = LogisticRegression(solver=\"liblinear\", C=10**10, random_state=42)\n", "log_reg = LogisticRegression(solver=\"lbfgs\", C=10**10, random_state=42)\n",
"log_reg.fit(X, y)\n", "log_reg.fit(X, y)\n",
"\n", "\n",
"x0, x1 = np.meshgrid(\n", "x0, x1 = np.meshgrid(\n",
@ -1160,7 +1220,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 61, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1173,7 +1233,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 62, "execution_count": 65,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1211,7 +1271,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 63, "execution_count": 66,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1220,7 +1280,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 64, "execution_count": 67,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1265,7 +1325,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 65, "execution_count": 68,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1282,7 +1342,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 69,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1298,7 +1358,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 70,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1314,7 +1374,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 68, "execution_count": 71,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1345,7 +1405,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 69, "execution_count": 72,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1366,7 +1426,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 70, "execution_count": 73,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1375,7 +1435,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 71, "execution_count": 74,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1391,7 +1451,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 72, "execution_count": 75,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1411,7 +1471,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 76,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1430,7 +1490,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 77,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1458,7 +1518,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 78,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1489,7 +1549,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 79,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1505,7 +1565,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 80,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1526,7 +1586,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 81,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1560,7 +1620,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 82,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1588,7 +1648,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 83,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1628,7 +1688,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": 84,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1656,7 +1716,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 82, "execution_count": 85,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1701,7 +1761,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": 86,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1730,7 +1790,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -1,5 +1,12 @@
{ {
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -60,6 +67,23 @@
" plt.savefig(path, format='png', dpi=300)" " plt.savefig(path, format='png', dpi=300)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook assumes you have installed Scikit-Learn ≥0.20."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\""
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -76,7 +100,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -98,7 +122,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -160,7 +184,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -204,7 +228,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -269,7 +293,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -293,7 +317,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -309,7 +333,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -332,7 +356,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -356,7 +380,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -393,7 +417,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -432,7 +456,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -453,7 +477,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -472,7 +496,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -495,7 +519,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -510,7 +534,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -523,7 +547,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -545,7 +569,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 19,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -613,7 +637,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -625,7 +649,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 21,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -638,7 +662,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 22,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -681,7 +705,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -693,7 +717,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 24,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -705,7 +729,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -728,7 +752,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -765,7 +789,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -779,19 +803,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"**Warning**: the default value of `gamma` will change from `'auto'` to `'scale'` in version 0.22 to better account for unscaled features. To preserve the same results as in the book, we explicitly set it to `'auto'`, but you should probably just use the default in your own code." "**Note**: to be future-proof, we set `gamma=\"scale\"`, as this will be the default value in Scikit-Learn 0.22."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.svm import SVR\n",
"\n",
"svm_poly_reg = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1, gamma=\"auto\")\n",
"svm_poly_reg.fit(X, y)"
] ]
}, },
{ {
@ -802,15 +814,27 @@
"source": [ "source": [
"from sklearn.svm import SVR\n", "from sklearn.svm import SVR\n",
"\n", "\n",
"svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1, gamma=\"auto\")\n", "svm_poly_reg = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1, gamma=\"scale\")\n",
"svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1, gamma=\"auto\")\n", "svm_poly_reg.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.svm import SVR\n",
"\n",
"svm_poly_reg1 = SVR(kernel=\"poly\", degree=2, C=100, epsilon=0.1, gamma=\"scale\")\n",
"svm_poly_reg2 = SVR(kernel=\"poly\", degree=2, C=0.01, epsilon=0.1, gamma=\"scale\")\n",
"svm_poly_reg1.fit(X, y)\n", "svm_poly_reg1.fit(X, y)\n",
"svm_poly_reg2.fit(X, y)" "svm_poly_reg2.fit(X, y)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 30,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -835,7 +859,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 31,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -846,7 +870,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -874,17 +898,17 @@
" ax.plot_wireframe(x1, x2, df, alpha=0.3, color=\"k\")\n", " ax.plot_wireframe(x1, x2, df, alpha=0.3, color=\"k\")\n",
" ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, \"bs\")\n", " ax.plot(X_crop[:, 0][y_crop==0], X_crop[:, 1][y_crop==0], 0, \"bs\")\n",
" ax.axis(x1_lim + x2_lim)\n", " ax.axis(x1_lim + x2_lim)\n",
" ax.text(4.5, 2.5, 3.8, \"Decision function $h$\", fontsize=15)\n", " ax.text(4.5, 2.5, 3.8, \"Decision function $h$\", fontsize=16)\n",
" ax.set_xlabel(r\"Petal length\", fontsize=15)\n", " ax.set_xlabel(r\"Petal length\", fontsize=16, labelpad=10)\n",
" ax.set_ylabel(r\"Petal width\", fontsize=15)\n", " ax.set_ylabel(r\"Petal width\", fontsize=16, labelpad=10)\n",
" ax.set_zlabel(r\"$h = \\mathbf{w}^T \\mathbf{x} + b$\", fontsize=18)\n", " ax.set_zlabel(r\"$h = \\mathbf{w}^T \\mathbf{x} + b$\", fontsize=18, labelpad=5)\n",
" ax.legend(loc=\"upper left\", fontsize=16)\n", " ax.legend(loc=\"upper left\", fontsize=16)\n",
"\n", "\n",
"fig = plt.figure(figsize=(11, 6))\n", "fig = plt.figure(figsize=(11, 6))\n",
"ax1 = fig.add_subplot(111, projection='3d')\n", "ax1 = fig.add_subplot(111, projection='3d')\n",
"plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])\n", "plot_3D_decision_function(ax1, w=svm_clf2.coef_[0], b=svm_clf2.intercept_[0])\n",
"\n", "\n",
"#save_fig(\"iris_3D_plot\")\n", "save_fig(\"iris_3D_plot\")\n",
"plt.show()" "plt.show()"
] ]
}, },
@ -897,7 +921,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -931,7 +955,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -956,7 +980,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -992,7 +1016,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1003,7 +1027,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 37,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1021,7 +1045,11 @@
" tols.append(tol)\n", " tols.append(tol)\n",
" print(i, tol, t2-t1)\n", " print(i, tol, t2-t1)\n",
" tol /= 10\n", " tol /= 10\n",
"plt.semilogx(tols, times)" "plt.semilogx(tols, times, \"bo-\")\n",
"plt.xlabel(\"Tolerance\", fontsize=16)\n",
"plt.ylabel(\"Time (seconds)\", fontsize=16)\n",
"plt.grid(True)\n",
"plt.show()"
] ]
}, },
{ {
@ -1033,7 +1061,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1044,7 +1072,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1109,7 +1137,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 40,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1119,7 +1147,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 41,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1128,7 +1156,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 42,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1139,7 +1167,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 42, "execution_count": 43,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1165,7 +1193,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 44,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -1173,7 +1201,7 @@
"source": [ "source": [
"from sklearn.linear_model import SGDClassifier\n", "from sklearn.linear_model import SGDClassifier\n",
"\n", "\n",
"sgd_clf = SGDClassifier(loss=\"hinge\", alpha = 0.017, max_iter = 50, tol=-np.infty, random_state=42)\n", "sgd_clf = SGDClassifier(loss=\"hinge\", alpha=0.017, max_iter=1000, tol=1e-3, random_state=42)\n",
"sgd_clf.fit(X, y.ravel())\n", "sgd_clf.fit(X, y.ravel())\n",
"\n", "\n",
"m = len(X)\n", "m = len(X)\n",
@ -1242,7 +1270,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 44, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1259,7 +1287,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1273,7 +1301,7 @@
"lin_clf = LinearSVC(loss=\"hinge\", C=C, random_state=42)\n", "lin_clf = LinearSVC(loss=\"hinge\", C=C, random_state=42)\n",
"svm_clf = SVC(kernel=\"linear\", C=C)\n", "svm_clf = SVC(kernel=\"linear\", C=C)\n",
"sgd_clf = SGDClassifier(loss=\"hinge\", learning_rate=\"constant\", eta0=0.001, alpha=alpha,\n", "sgd_clf = SGDClassifier(loss=\"hinge\", learning_rate=\"constant\", eta0=0.001, alpha=alpha,\n",
" max_iter=100000, tol=-np.infty, random_state=42)\n", " max_iter=1000, tol=1e-3, random_state=42)\n",
"\n", "\n",
"scaler = StandardScaler()\n", "scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(X)\n", "X_scaled = scaler.fit_transform(X)\n",
@ -1296,7 +1324,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": 47,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1358,19 +1386,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 47, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"try:\n",
"from sklearn.datasets import fetch_openml\n", "from sklearn.datasets import fetch_openml\n",
"mnist = fetch_openml('mnist_784', version=1, cache=True)\n", "mnist = fetch_openml('mnist_784', version=1, cache=True)\n",
"except ImportError:\n",
" from sklearn.datasets import fetch_mldata\n",
" mnist = fetch_mldata('MNIST original')\n",
"\n", "\n",
"X = mnist[\"data\"]\n", "X = mnist[\"data\"]\n",
"y = mnist[\"target\"]\n", "y = mnist[\"target\"].astype(np.uint8)\n",
"\n", "\n",
"X_train = X[:60000]\n", "X_train = X[:60000]\n",
"y_train = y[:60000]\n", "y_train = y[:60000]\n",
@ -1382,31 +1406,21 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Many training algorithms are sensitive to the order of the training instances, so it's generally good practice to shuffle them first:" "Many training algorithms are sensitive to the order of the training instances, so it's generally good practice to shuffle them first. However, the dataset is already shuffled, so we do not need to do it."
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"rnd_idx = np.random.permutation(60000)\n",
"X_train = X_train[rnd_idx]\n",
"y_train = y_train[rnd_idx]"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's start simple, with a linear SVM classifier. It will automatically use the One-vs-All (also called One-vs-the-Rest, OvR) strategy, so there's nothing special we need to do. Easy!" "Let's start simple, with a linear SVM classifier. It will automatically use the One-vs-All (also called One-vs-the-Rest, OvR) strategy, so there's nothing special we need to do. Easy!\n",
"\n",
"**Warning**: this may take a few minutes depending on your hardware."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 49, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1423,7 +1437,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 50, "execution_count": 61,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1437,12 +1451,12 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Wow, 86% accuracy on MNIST is a really bad performance. This linear model is certainly too simple for MNIST, but perhaps we just needed to scale the data first:" "Okay, 89.5% accuracy on MNIST is pretty bad. This linear model is certainly too simple for MNIST, but perhaps we just needed to scale the data first:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 62,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1451,9 +1465,16 @@
"X_test_scaled = scaler.transform(X_test.astype(np.float32))" "X_test_scaled = scaler.transform(X_test.astype(np.float32))"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Warning**: this may take a few minutes depending on your hardware."
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 52, "execution_count": 63,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1463,7 +1484,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 53, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1475,24 +1496,29 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"That's much better (we cut the error rate in two), but still not great at all for MNIST. If we want to use an SVM, we will have to use a kernel. Let's try an `SVC` with an RBF kernel (the default).\n", "That's much better (we cut the error rate by about 25%), but still not great at all for MNIST. If we want to use an SVM, we will have to use a kernel. Let's try an `SVC` with an RBF kernel (the default)."
"\n", ]
"**Warning**: if you are using Scikit-Learn ≤ 0.19, the `SVC` class will use the One-vs-One (OvO) strategy by default, so you must explicitly set `decision_function_shape=\"ovr\"` if you want to use the OvR strategy instead (OvR is the default since 0.19)." },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note**: to be future-proof we set `gamma=\"scale\"` since it will be the default value in Scikit-Learn 0.22."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 77,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"svm_clf = SVC(decision_function_shape=\"ovr\", gamma=\"auto\")\n", "svm_clf = SVC(gamma=\"scale\")\n",
"svm_clf.fit(X_train_scaled[:10000], y_train[:10000])" "svm_clf.fit(X_train_scaled[:10000], y_train[:10000])"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 78,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1793,7 +1819,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -71,7 +71,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Training and visualizing" "This notebook assumes you have installed Scikit-Learn ≥0.20."
] ]
}, },
{ {
@ -79,6 +79,23 @@
"execution_count": 2, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Training and visualizing"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [ "source": [
"from sklearn.datasets import load_iris\n", "from sklearn.datasets import load_iris\n",
"from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n",
@ -93,7 +110,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -111,7 +128,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -165,7 +182,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -174,7 +191,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -190,7 +207,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -199,7 +216,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -213,7 +230,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -230,7 +247,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -256,7 +273,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -275,7 +292,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -311,7 +328,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -325,7 +342,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -337,7 +354,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -384,7 +401,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -399,7 +416,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -489,7 +506,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -507,7 +524,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -525,21 +542,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 21,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.model_selection import GridSearchCV\n", "from sklearn.model_selection import GridSearchCV\n",
"\n", "\n",
"params = {'max_leaf_nodes': list(range(2, 100)), 'min_samples_split': [2, 3, 4]}\n", "params = {'max_leaf_nodes': list(range(2, 100)), 'min_samples_split': [2, 3, 4]}\n",
"grid_search_cv = GridSearchCV(DecisionTreeClassifier(random_state=42), params, n_jobs=-1, verbose=1, cv=3)\n", "grid_search_cv = GridSearchCV(DecisionTreeClassifier(random_state=42), params, verbose=1, cv=3)\n",
"\n", "\n",
"grid_search_cv.fit(X_train, y_train)" "grid_search_cv.fit(X_train, y_train)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 22,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -562,7 +579,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -595,7 +612,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 24,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -622,7 +639,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -650,7 +667,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -662,7 +679,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -680,24 +697,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 28,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"accuracy_score(y_test, y_pred_majority_votes.reshape([-1]))" "accuracy_score(y_test, y_pred_majority_votes.reshape([-1]))"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -71,7 +71,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Voting classifiers" "This notebook assumes you have installed Scikit-Learn ≥0.20."
] ]
}, },
{ {
@ -79,6 +79,23 @@
"execution_count": 2, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Voting classifiers"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [ "source": [
"heads_proba = 0.51\n", "heads_proba = 0.51\n",
"coin_tosses = (np.random.rand(10000, 10) < heads_proba).astype(np.int32)\n", "coin_tosses = (np.random.rand(10000, 10) < heads_proba).astype(np.int32)\n",
@ -87,7 +104,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -105,7 +122,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -120,27 +137,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"**Warning**: In Scikit-Learn 0.20, some hyperparameters (`solver`, `n_estimators`, `gamma`, etc.) start issuing warnings about the fact that their default value will change in Scikit-Learn 0.22. To avoid these warnings and ensure that this notebooks keeps producing the same outputs as in the book, I set the hyperparameters to their old default value. In your own code, you can simply rely on the latest default values instead." "**Note**: to be future-proof, we set `solver=\"lbfgs\"`, `n_estimators=100`, and `gamma=\"scale\"` since these will be the default values in upcoming Scikit-Learn versions."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import VotingClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.svm import SVC\n",
"\n",
"log_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\n",
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n",
"svm_clf = SVC(gamma=\"auto\", random_state=42)\n",
"\n",
"voting_clf = VotingClassifier(\n",
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
" voting='hard')"
] ]
}, },
{ {
@ -149,7 +146,18 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"voting_clf.fit(X_train, y_train)" "from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import VotingClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.svm import SVC\n",
"\n",
"log_clf = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
"rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"svm_clf = SVC(gamma=\"scale\", random_state=42)\n",
"\n",
"voting_clf = VotingClassifier(\n",
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
" voting='hard')"
] ]
}, },
{ {
@ -157,6 +165,15 @@
"execution_count": 7, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"voting_clf.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [ "source": [
"from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import accuracy_score\n",
"\n", "\n",
@ -166,15 +183,22 @@
" print(clf.__class__.__name__, accuracy_score(y_test, y_pred))" " print(clf.__class__.__name__, accuracy_score(y_test, y_pred))"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Soft voting:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"log_clf = LogisticRegression(solver=\"liblinear\", random_state=42)\n", "log_clf = LogisticRegression(solver=\"lbfgs\", random_state=42)\n",
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n", "rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"svm_clf = SVC(gamma=\"auto\", probability=True, random_state=42)\n", "svm_clf = SVC(gamma=\"scale\", probability=True, random_state=42)\n",
"\n", "\n",
"voting_clf = VotingClassifier(\n", "voting_clf = VotingClassifier(\n",
" estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n", " estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],\n",
@ -184,7 +208,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -205,7 +229,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -214,14 +238,14 @@
"\n", "\n",
"bag_clf = BaggingClassifier(\n", "bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n", " DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
" max_samples=100, bootstrap=True, n_jobs=-1, random_state=42)\n", " max_samples=100, bootstrap=True, random_state=42)\n",
"bag_clf.fit(X_train, y_train)\n", "bag_clf.fit(X_train, y_train)\n",
"y_pred = bag_clf.predict(X_test)" "y_pred = bag_clf.predict(X_test)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -231,7 +255,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -243,7 +267,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -269,7 +293,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -293,18 +317,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"bag_clf = BaggingClassifier(\n", "bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n", " DecisionTreeClassifier(splitter=\"random\", max_leaf_nodes=16, random_state=42),\n",
" n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1, random_state=42)" " n_estimators=500, max_samples=1.0, bootstrap=True, random_state=42)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -314,39 +338,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n",
"\n", "\n",
"rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)\n", "rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, random_state=42)\n",
"rnd_clf.fit(X_train, y_train)\n", "rnd_clf.fit(X_train, y_train)\n",
"\n", "\n",
"y_pred_rf = rnd_clf.predict(X_test)" "y_pred_rf = rnd_clf.predict(X_test)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.datasets import load_iris\n", "np.sum(y_pred == y_pred_rf) / len(y_pred) # almost identical predictions"
"iris = load_iris()\n",
"rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1, random_state=42)\n",
"rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n",
"for name, score in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
" print(name, score)"
] ]
}, },
{ {
@ -355,7 +365,12 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"rnd_clf.feature_importances_" "from sklearn.datasets import load_iris\n",
"iris = load_iris()\n",
"rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)\n",
"rnd_clf.fit(iris[\"data\"], iris[\"target\"])\n",
"for name, score in zip(iris[\"feature_names\"], rnd_clf.feature_importances_):\n",
" print(name, score)"
] ]
}, },
{ {
@ -363,6 +378,15 @@
"execution_count": 21, "execution_count": 21,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"rnd_clf.feature_importances_"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [ "source": [
"plt.figure(figsize=(6, 4))\n", "plt.figure(figsize=(6, 4))\n",
"\n", "\n",
@ -384,20 +408,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"bag_clf = BaggingClassifier(\n", "bag_clf = BaggingClassifier(\n",
" DecisionTreeClassifier(random_state=42), n_estimators=500,\n", " DecisionTreeClassifier(random_state=42), n_estimators=500,\n",
" bootstrap=True, n_jobs=-1, oob_score=True, random_state=40)\n", " bootstrap=True, oob_score=True, random_state=40)\n",
"bag_clf.fit(X_train, y_train)\n", "bag_clf.fit(X_train, y_train)\n",
"bag_clf.oob_score_" "bag_clf.oob_score_"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 24,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -406,7 +430,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -422,29 +446,16 @@
"## Feature importance" "## Feature importance"
] ]
}, },
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" from sklearn.datasets import fetch_openml\n",
" mnist = fetch_openml('mnist_784', version=1)\n",
" mnist.target = mnist.target.astype(np.int64)\n",
"except ImportError:\n",
" from sklearn.datasets import fetch_mldata\n",
" mnist = fetch_mldata('MNIST original')"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n", "from sklearn.datasets import fetch_openml\n",
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])" "\n",
"mnist = fetch_openml('mnist_784', version=1)\n",
"mnist.target = mnist.target.astype(np.uint8)"
] ]
}, },
{ {
@ -452,6 +463,16 @@
"execution_count": 27, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"rnd_clf.fit(mnist[\"data\"], mnist[\"target\"])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [ "source": [
"def plot_digit(data):\n", "def plot_digit(data):\n",
" image = data.reshape(28, 28)\n", " image = data.reshape(28, 28)\n",
@ -462,7 +483,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 29,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -484,7 +505,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 30,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -498,7 +519,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 31,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -507,7 +528,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -518,7 +539,7 @@
" sample_weights = np.ones(m)\n", " sample_weights = np.ones(m)\n",
" plt.subplot(subplot)\n", " plt.subplot(subplot)\n",
" for i in range(5):\n", " for i in range(5):\n",
" svm_clf = SVC(kernel=\"rbf\", C=0.05, gamma=\"auto\", random_state=42)\n", " svm_clf = SVC(kernel=\"rbf\", C=0.05, gamma=\"scale\", random_state=42)\n",
" svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n", " svm_clf.fit(X_train, y_train, sample_weight=sample_weights)\n",
" y_pred = svm_clf.predict(X_train)\n", " y_pred = svm_clf.predict(X_train)\n",
" sample_weights[y_pred != y_train] *= (1 + learning_rate)\n", " sample_weights[y_pred != y_train] *= (1 + learning_rate)\n",
@ -537,7 +558,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -553,7 +574,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -564,7 +585,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -576,7 +597,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -587,7 +608,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 37,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -598,7 +619,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -607,7 +628,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -616,7 +637,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 40,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -625,7 +646,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 41,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -674,7 +695,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 42,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -686,7 +707,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 42, "execution_count": 43,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -696,7 +717,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 44,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -723,7 +744,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 44, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -746,7 +767,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -755,7 +776,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": 47,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -781,7 +802,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 47, "execution_count": 48,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -805,7 +826,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 48, "execution_count": 49,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -814,7 +835,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 49, "execution_count": 50,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -830,7 +851,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 50, "execution_count": 51,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -843,7 +864,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 52,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -851,22 +872,8 @@
" xgb_reg = xgboost.XGBRegressor(random_state=42)\n", " xgb_reg = xgboost.XGBRegressor(random_state=42)\n",
" xgb_reg.fit(X_train, y_train)\n", " xgb_reg.fit(X_train, y_train)\n",
" y_pred = xgb_reg.predict(X_val)\n", " y_pred = xgb_reg.predict(X_val)\n",
" val_error = mean_squared_error(y_val, y_pred)\n", " val_error = mean_squared_error(y_val, y_pred) # Not shown\n",
" print(\"Validation MSE:\", val_error)" " print(\"Validation MSE:\", val_error) # Not shown"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"if xgboost is not None: # not shown in the book\n",
" xgb_reg.fit(X_train, y_train,\n",
" eval_set=[(X_val, y_val)], early_stopping_rounds=2)\n",
" y_pred = xgb_reg.predict(X_val)\n",
" val_error = mean_squared_error(y_val, y_pred)\n",
" print(\"Validation MSE:\", val_error)"
] ]
}, },
{ {
@ -875,7 +882,12 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%timeit xgboost.XGBRegressor().fit(X_train, y_train) if xgboost is not None else None" "if xgboost is not None: # not shown in the book\n",
" xgb_reg.fit(X_train, y_train,\n",
" eval_set=[(X_val, y_val)], early_stopping_rounds=2)\n",
" y_pred = xgb_reg.predict(X_val)\n",
" val_error = mean_squared_error(y_val, y_pred) # Not shown\n",
" print(\"Validation MSE:\", val_error) # Not shown"
] ]
}, },
{ {
@ -883,6 +895,15 @@
"execution_count": 54, "execution_count": 54,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"%timeit xgboost.XGBRegressor().fit(X_train, y_train) if xgboost is not None else None"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [ "source": [
"%timeit GradientBoostingRegressor().fit(X_train, y_train)" "%timeit GradientBoostingRegressor().fit(X_train, y_train)"
] ]
@ -933,7 +954,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 56,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -942,7 +963,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 57,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -961,7 +982,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 58,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -972,19 +993,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 58, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"random_forest_clf = RandomForestClassifier(n_estimators=10, random_state=42)\n", "random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"extra_trees_clf = ExtraTreesClassifier(n_estimators=10, random_state=42)\n", "extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)\n",
"svm_clf = LinearSVC(random_state=42)\n", "svm_clf = LinearSVC(random_state=42)\n",
"mlp_clf = MLPClassifier(random_state=42)" "mlp_clf = MLPClassifier(random_state=42)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 59, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -996,7 +1017,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 60, "execution_count": 61,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1019,7 +1040,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 61, "execution_count": 62,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1028,7 +1049,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 62, "execution_count": 63,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1042,7 +1063,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 63, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1051,7 +1072,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 64, "execution_count": 65,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1060,7 +1081,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 65, "execution_count": 66,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1069,7 +1090,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 67,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1085,7 +1106,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 68,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1101,7 +1122,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 68, "execution_count": 69,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1117,7 +1138,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 69, "execution_count": 70,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1133,7 +1154,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 70, "execution_count": 71,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1149,7 +1170,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 71, "execution_count": 72,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1165,7 +1186,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 72, "execution_count": 73,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1174,7 +1195,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 74,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1197,7 +1218,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 75,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1206,7 +1227,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 76,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1236,7 +1257,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 77,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1248,7 +1269,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 78,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1257,7 +1278,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 79,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1267,7 +1288,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 80,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1290,7 +1311,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 81,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1302,7 +1323,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": 82,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1311,7 +1332,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 82, "execution_count": 83,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1320,7 +1341,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": 84,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1337,7 +1358,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -64,6 +64,23 @@
"warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")" "warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook assumes you have installed Scikit-Learn ≥0.20."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sklearn\n",
"assert sklearn.__version__ >= \"0.20\""
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -74,7 +91,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -115,7 +132,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -127,7 +144,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -139,7 +156,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -148,7 +165,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -158,7 +175,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -181,7 +198,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -193,7 +210,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -202,7 +219,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -220,7 +237,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -236,7 +253,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -252,7 +269,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -268,7 +285,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -284,7 +301,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -300,7 +317,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -316,7 +333,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -332,7 +349,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -355,7 +372,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -378,7 +395,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 21,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -394,7 +411,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 22,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -417,7 +434,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 23,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -445,7 +462,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 24,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -469,7 +486,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -498,9 +515,9 @@
"ax.plot(X3D_inv[:, 0], X3D_inv[:, 1], X3D_inv[:, 2], \"k+\")\n", "ax.plot(X3D_inv[:, 0], X3D_inv[:, 1], X3D_inv[:, 2], \"k+\")\n",
"ax.plot(X3D_inv[:, 0], X3D_inv[:, 1], X3D_inv[:, 2], \"k.\")\n", "ax.plot(X3D_inv[:, 0], X3D_inv[:, 1], X3D_inv[:, 2], \"k.\")\n",
"ax.plot(X3D_above[:, 0], X3D_above[:, 1], X3D_above[:, 2], \"bo\")\n", "ax.plot(X3D_above[:, 0], X3D_above[:, 1], X3D_above[:, 2], \"bo\")\n",
"ax.set_xlabel(\"$x_1$\", fontsize=18)\n", "ax.set_xlabel(\"$x_1$\", fontsize=18, labelpad=10)\n",
"ax.set_ylabel(\"$x_2$\", fontsize=18)\n", "ax.set_ylabel(\"$x_2$\", fontsize=18, labelpad=10)\n",
"ax.set_zlabel(\"$x_3$\", fontsize=18)\n", "ax.set_zlabel(\"$x_3$\", fontsize=18, labelpad=10)\n",
"ax.set_xlim(axes[0:2])\n", "ax.set_xlim(axes[0:2])\n",
"ax.set_ylim(axes[2:4])\n", "ax.set_ylim(axes[2:4])\n",
"ax.set_zlim(axes[4:6])\n", "ax.set_zlim(axes[4:6])\n",
@ -519,7 +536,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -548,7 +565,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -558,7 +575,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 28,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -582,7 +599,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 29,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -607,7 +624,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 30,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -695,7 +712,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 31,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -768,23 +785,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from six.moves import urllib\n",
"try:\n",
"from sklearn.datasets import fetch_openml\n", "from sklearn.datasets import fetch_openml\n",
"\n",
"mnist = fetch_openml('mnist_784', version=1)\n", "mnist = fetch_openml('mnist_784', version=1)\n",
" mnist.target = mnist.target.astype(np.int64)\n", "mnist.target = mnist.target.astype(np.uint8)"
"except ImportError:\n",
" from sklearn.datasets import fetch_mldata\n",
" mnist = fetch_mldata('MNIST original')"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -798,7 +811,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -810,7 +823,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -819,7 +832,28 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(6,4))\n",
"plt.plot(cumsum, linewidth=3)\n",
"plt.axis([0, 400, 0, 1])\n",
"plt.xlabel(\"Dimensions\")\n",
"plt.ylabel(\"Explained Variance\")\n",
"plt.plot([d, d], [0, 0.95], \"k:\")\n",
"plt.plot([0, d], [0.95, 0.95], \"k:\")\n",
"plt.plot(d, 0.95, \"ko\")\n",
"plt.annotate(\"Elbow\", xy=(65, 0.85), xytext=(70, 0.7),\n",
" arrowprops=dict(arrowstyle=\"->\"), fontsize=16)\n",
"plt.grid(True)\n",
"save_fig(\"explained_variance_plot\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -829,7 +863,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 37,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -838,7 +872,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -847,7 +881,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -858,7 +892,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 40,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -880,7 +914,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 41,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -897,7 +931,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 42,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -913,7 +947,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 42, "execution_count": 43,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -930,7 +964,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 44,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -939,7 +973,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 44, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -953,7 +987,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -969,7 +1003,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": 47,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -985,7 +1019,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 47, "execution_count": 48,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1008,7 +1042,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 48, "execution_count": 49,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1028,7 +1062,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 49, "execution_count": 50,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1044,7 +1078,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 50, "execution_count": 51,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1057,7 +1091,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 52,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1081,7 +1115,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 52, "execution_count": 53,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1109,7 +1143,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 53, "execution_count": 54,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1146,7 +1180,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 55,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -1185,7 +1219,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 56,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1194,7 +1228,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 57,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1206,7 +1240,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 58,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1240,7 +1274,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 58, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1264,7 +1298,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 59, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1280,7 +1314,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 60, "execution_count": 61,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1290,7 +1324,7 @@
"\n", "\n",
"clf = Pipeline([\n", "clf = Pipeline([\n",
" (\"kpca\", KernelPCA(n_components=2)),\n", " (\"kpca\", KernelPCA(n_components=2)),\n",
" (\"log_reg\", LogisticRegression(solver=\"liblinear\"))\n", " (\"log_reg\", LogisticRegression(solver=\"lbfgs\"))\n",
" ])\n", " ])\n",
"\n", "\n",
"param_grid = [{\n", "param_grid = [{\n",
@ -1304,7 +1338,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 61, "execution_count": 62,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1313,7 +1347,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 62, "execution_count": 63,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1325,7 +1359,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 63, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1343,7 +1377,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 64, "execution_count": 65,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1352,7 +1386,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 65, "execution_count": 66,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1364,7 +1398,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 67,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1388,7 +1422,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 68,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1400,7 +1434,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 68, "execution_count": 69,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1412,7 +1446,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 69, "execution_count": 70,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1424,7 +1458,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 70, "execution_count": 71,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1439,7 +1473,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 71, "execution_count": 72,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1535,7 +1569,7 @@
"source": [ "source": [
"from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n",
"\n", "\n",
"rnd_clf = RandomForestClassifier(n_estimators=10, random_state=42)" "rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)"
] ]
}, },
{ {
@ -1604,7 +1638,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"rnd_clf2 = RandomForestClassifier(n_estimators=10, random_state=42)\n", "rnd_clf2 = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"t0 = time.time()\n", "t0 = time.time()\n",
"rnd_clf2.fit(X_train_reduced, y_train)\n", "rnd_clf2.fit(X_train_reduced, y_train)\n",
"t1 = time.time()" "t1 = time.time()"
@ -2232,7 +2266,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -1573,7 +1573,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"liblinear\", random_state=42)\n", "log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X_train, y_train)" "log_reg.fit(X_train, y_train)"
] ]
}, },
@ -1610,7 +1610,7 @@
"source": [ "source": [
"pipeline = Pipeline([\n", "pipeline = Pipeline([\n",
" (\"kmeans\", KMeans(n_clusters=50, random_state=42)),\n", " (\"kmeans\", KMeans(n_clusters=50, random_state=42)),\n",
" (\"log_reg\", LogisticRegression(multi_class=\"ovr\", solver=\"liblinear\", random_state=42)),\n", " (\"log_reg\", LogisticRegression(multi_class=\"ovr\", solver=\"lbfgs\", random_state=42)),\n",
"])\n", "])\n",
"pipeline.fit(X_train, y_train)" "pipeline.fit(X_train, y_train)"
] ]
@ -1721,7 +1721,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"liblinear\", random_state=42)\n", "log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X_train[:n_labeled], y_train[:n_labeled])\n", "log_reg.fit(X_train[:n_labeled], y_train[:n_labeled])\n",
"log_reg.score(X_test, y_test)" "log_reg.score(X_test, y_test)"
] ]
@ -1804,7 +1804,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"liblinear\", random_state=42)\n", "log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X_representative_digits, y_representative_digits)\n", "log_reg.fit(X_representative_digits, y_representative_digits)\n",
"log_reg.score(X_test, y_test)" "log_reg.score(X_test, y_test)"
] ]
@ -1840,7 +1840,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"liblinear\", random_state=42)\n", "log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X_train, y_train_propagated)" "log_reg.fit(X_train, y_train_propagated)"
] ]
}, },
@ -1894,7 +1894,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"liblinear\", random_state=42)\n", "log_reg = LogisticRegression(multi_class=\"ovr\", solver=\"lbfgs\", random_state=42)\n",
"log_reg.fit(X_train_partially_propagated, y_train_partially_propagated)" "log_reg.fit(X_train_partially_propagated, y_train_partially_propagated)"
] ]
}, },
@ -2280,7 +2280,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 134, "execution_count": 133,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2291,7 +2291,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 135, "execution_count": 134,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2300,7 +2300,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 136, "execution_count": 135,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -2318,7 +2318,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 137, "execution_count": 136,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2339,7 +2339,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 138, "execution_count": 137,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2348,7 +2348,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 139, "execution_count": 138,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2365,7 +2365,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 140, "execution_count": 139,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2374,7 +2374,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 141, "execution_count": 140,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2383,7 +2383,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 142, "execution_count": 141,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2399,7 +2399,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 143, "execution_count": 142,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2415,7 +2415,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 144, "execution_count": 143,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2431,7 +2431,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 145, "execution_count": 144,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2440,7 +2440,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 146, "execution_count": 145,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2456,7 +2456,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 147, "execution_count": 146,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2466,7 +2466,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 148, "execution_count": 147,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2489,7 +2489,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 149, "execution_count": 148,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2505,7 +2505,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 150, "execution_count": 149,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2528,7 +2528,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 151, "execution_count": 150,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2567,7 +2567,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 152, "execution_count": 151,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2592,7 +2592,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 153, "execution_count": 152,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2608,7 +2608,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 154, "execution_count": 153,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2626,7 +2626,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 155, "execution_count": 154,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2638,7 +2638,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 156, "execution_count": 155,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2663,7 +2663,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 157, "execution_count": 156,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2674,7 +2674,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 158, "execution_count": 157,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2714,7 +2714,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 159, "execution_count": 158,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2723,7 +2723,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 160, "execution_count": 159,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2739,7 +2739,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 161, "execution_count": 160,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2756,7 +2756,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 162, "execution_count": 161,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2765,7 +2765,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 163, "execution_count": 162,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2788,7 +2788,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 164, "execution_count": 163,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2798,7 +2798,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 165, "execution_count": 164,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2808,7 +2808,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 166, "execution_count": 165,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2839,7 +2839,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 167, "execution_count": 166,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2858,7 +2858,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 168, "execution_count": 167,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2867,7 +2867,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 169, "execution_count": 168,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2890,7 +2890,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 170, "execution_count": 169,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2899,7 +2899,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 171, "execution_count": 170,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2916,7 +2916,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 172, "execution_count": 171,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2925,7 +2925,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 173, "execution_count": 172,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2936,7 +2936,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 174, "execution_count": 173,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2951,7 +2951,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 175, "execution_count": 174,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2960,7 +2960,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 176, "execution_count": 175,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2969,7 +2969,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 177, "execution_count": 176,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -2996,7 +2996,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 178, "execution_count": 177,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -3005,7 +3005,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 179, "execution_count": 178,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -3017,7 +3017,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 180, "execution_count": 179,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -3051,7 +3051,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 181, "execution_count": 180,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -3060,7 +3060,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 182, "execution_count": 181,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -3073,7 +3073,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 183, "execution_count": 182,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -3163,7 +3163,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -92,7 +92,7 @@
"X = iris.data[:, (2, 3)] # petal length, petal width\n", "X = iris.data[:, (2, 3)] # petal length, petal width\n",
"y = (iris.target == 0).astype(np.int)\n", "y = (iris.target == 0).astype(np.int)\n",
"\n", "\n",
"per_clf = Perceptron(max_iter=100, tol=-np.infty, random_state=42)\n", "per_clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)\n",
"per_clf.fit(X, y)\n", "per_clf.fit(X, y)\n",
"\n", "\n",
"y_pred = per_clf.predict([[2, 0.5]])" "y_pred = per_clf.predict([[2, 0.5]])"
@ -474,7 +474,7 @@
" plt.axis('off')\n", " plt.axis('off')\n",
" plt.title(class_names[y_train[index]], fontsize=12)\n", " plt.title(class_names[y_train[index]], fontsize=12)\n",
"plt.subplots_adjust(wspace=0.2, hspace=0.5)\n", "plt.subplots_adjust(wspace=0.2, hspace=0.5)\n",
"save_fig('fashion_mnist', tight_layout=False)\n", "save_fig('fashion_mnist_diagram', tight_layout=False)\n",
"plt.show()" "plt.show()"
] ]
}, },
@ -668,7 +668,7 @@
"source": [ "source": [
"model.compile(loss=keras.losses.sparse_categorical_crossentropy,\n", "model.compile(loss=keras.losses.sparse_categorical_crossentropy,\n",
" optimizer=keras.optimizers.SGD(),\n", " optimizer=keras.optimizers.SGD(),\n",
" metrics=[keras.metrics.Accuracy()])" " metrics=[keras.metrics.sparse_categorical_accuracy])"
] ]
}, },
{ {
@ -719,7 +719,7 @@
"pd.DataFrame(history.history).plot(figsize=(8, 5))\n", "pd.DataFrame(history.history).plot(figsize=(8, 5))\n",
"plt.grid(True)\n", "plt.grid(True)\n",
"plt.gca().set_ylim(0, 1)\n", "plt.gca().set_ylim(0, 1)\n",
"save_fig(\"keras_learning_curve_graph\")\n", "save_fig(\"keras_learning_curves_graph\")\n",
"plt.show()" "plt.show()"
] ]
}, },
@ -1656,7 +1656,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 - tf2", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

File diff suppressed because it is too large Load Diff

View File

@ -45,10 +45,12 @@ xgboost==0.81
# you need a GPU card with CUDA Compute Capability 3.0 or higher support, and # you need a GPU card with CUDA Compute Capability 3.0 or higher support, and
# you must install CUDA, cuDNN and more: see tensorflow.org for the detailed # you must install CUDA, cuDNN and more: see tensorflow.org for the detailed
# installation instructions. # installation instructions.
tensorflow==1.12.0 tf-nightly-2.0-preview
tf-nightly-gpu-2.0-preview
#tensorflow==1.12.0
#tensorflow-gpu==1.12.0 #tensorflow-gpu==1.12.0
tensorflow-hub==0.2.0 #tensorflow-hub==0.2.0
tensorflow-probability==0.5.0 #tensorflow-probability==0.5.0
# Optional: OpenAI gym is only needed for the Reinforcement Learning chapter. # Optional: OpenAI gym is only needed for the Reinforcement Learning chapter.