Use 'np.random' rather than 'import numpy.random as rnd', and add random_state to make notebook's output constant

main
Aurélien Geron 2017-06-06 15:16:46 +02:00
parent 3e87055ed9
commit fb33333924
1 changed files with 217 additions and 84 deletions

View File

@ -55,11 +55,10 @@
"\n", "\n",
"# Common imports\n", "# Common imports\n",
"import numpy as np\n", "import numpy as np\n",
"import numpy.random as rnd\n",
"import os\n", "import os\n",
"\n", "\n",
"# to make this notebook's output stable across runs\n", "# to make this notebook's output stable across runs\n",
"rnd.seed(42)\n", "np.random.seed(42)\n",
"\n", "\n",
"# To plot pretty figures\n", "# To plot pretty figures\n",
"%matplotlib inline\n", "%matplotlib inline\n",
@ -257,7 +256,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 11,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -275,7 +276,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 12,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -286,7 +289,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 13,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -322,8 +327,8 @@
" plt.axis([0, 2, 0, 15])\n", " plt.axis([0, 2, 0, 15])\n",
" plt.title(r\"$\\eta = {}$\".format(eta), fontsize=16)\n", " plt.title(r\"$\\eta = {}$\".format(eta), fontsize=16)\n",
"\n", "\n",
"rnd.seed(42)\n", "np.random.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n", "theta = np.random.randn(2,1) # random initialization\n",
"\n", "\n",
"plt.figure(figsize=(10,4))\n", "plt.figure(figsize=(10,4))\n",
"plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n", "plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n",
@ -339,12 +344,14 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 15,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"rnd.seed(42)\n", "np.random.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n", "theta = np.random.randn(2,1) # random initialization\n",
"\n", "\n",
"plt.figure(figsize=(10,4))\n", "plt.figure(figsize=(10,4))\n",
"plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n", "plt.subplot(131); plot_gradient_descent(theta, eta=0.02)\n",
@ -370,13 +377,15 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 16,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"theta_path_sgd = []\n", "theta_path_sgd = []\n",
"m = len(X_b)\n", "m = len(X_b)\n",
"rnd.seed(42)" "np.random.seed(42)"
] ]
}, },
{ {
@ -443,7 +452,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import SGDRegressor\n", "from sklearn.linear_model import SGDRegressor\n",
"sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)\n", "sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1, random_state=42)\n",
"sgd_reg.fit(X, y.ravel())" "sgd_reg.fit(X, y.ravel())"
] ]
}, },
@ -485,8 +494,8 @@
"n_iterations = 50\n", "n_iterations = 50\n",
"minibatch_size = 20\n", "minibatch_size = 20\n",
"\n", "\n",
"rnd.seed(42)\n", "np.random.seed(42)\n",
"theta = rnd.randn(2,1) # random initialization\n", "theta = np.random.randn(2,1) # random initialization\n",
"\n", "\n",
"t0, t1 = 10, 1000\n", "t0, t1 = 10, 1000\n",
"def learning_schedule(t):\n", "def learning_schedule(t):\n",
@ -494,7 +503,7 @@
"\n", "\n",
"t = 0\n", "t = 0\n",
"for epoch in range(n_iterations):\n", "for epoch in range(n_iterations):\n",
" shuffled_indices = rnd.permutation(m)\n", " shuffled_indices = np.random.permutation(m)\n",
" X_b_shuffled = X_b[shuffled_indices]\n", " X_b_shuffled = X_b[shuffled_indices]\n",
" y_shuffled = y[shuffled_indices]\n", " y_shuffled = y[shuffled_indices]\n",
" for i in range(0, m, minibatch_size):\n", " for i in range(0, m, minibatch_size):\n",
@ -571,14 +580,16 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 25,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import numpy as np\n", "import numpy as np\n",
"import numpy.random as rnd\n", "import numpy.random as rnd\n",
"\n", "\n",
"rnd.seed(42)" "np.random.seed(42)"
] ]
}, },
{ {
@ -750,7 +761,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 34,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -775,7 +788,7 @@
"\n", "\n",
"polynomial_regression = Pipeline((\n", "polynomial_regression = Pipeline((\n",
" (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n", " (\"poly_features\", PolynomialFeatures(degree=10, include_bias=False)),\n",
" (\"sgd_reg\", LinearRegression()),\n", " (\"lin_reg\", LinearRegression()),\n",
" ))\n", " ))\n",
"\n", "\n",
"plot_learning_curves(polynomial_regression, X, y)\n", "plot_learning_curves(polynomial_regression, X, y)\n",
@ -806,10 +819,10 @@
"source": [ "source": [
"from sklearn.linear_model import Ridge\n", "from sklearn.linear_model import Ridge\n",
"\n", "\n",
"rnd.seed(42)\n", "np.random.seed(42)\n",
"m = 20\n", "m = 20\n",
"X = 3 * rnd.rand(m, 1)\n", "X = 3 * np.random.rand(m, 1)\n",
"y = 1 + 0.5 * X + rnd.randn(m, 1) / 1.5\n", "y = 1 + 0.5 * X + np.random.randn(m, 1) / 1.5\n",
"X_new = np.linspace(0, 3, 100).reshape(100, 1)\n", "X_new = np.linspace(0, 3, 100).reshape(100, 1)\n",
"\n", "\n",
"def plot_model(model_class, polynomial, alphas, **model_kargs):\n", "def plot_model(model_class, polynomial, alphas, **model_kargs):\n",
@ -832,10 +845,10 @@
"\n", "\n",
"plt.figure(figsize=(8,4))\n", "plt.figure(figsize=(8,4))\n",
"plt.subplot(121)\n", "plt.subplot(121)\n",
"plot_model(Ridge, polynomial=False, alphas=(0, 10, 100))\n", "plot_model(Ridge, polynomial=False, alphas=(0, 10, 100), random_state=42)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n", "plt.subplot(122)\n",
"plot_model(Ridge, polynomial=True, alphas=(0, 10**-5, 1))\n", "plot_model(Ridge, polynomial=True, alphas=(0, 10**-5, 1), random_state=42)\n",
"\n", "\n",
"save_fig(\"ridge_regression_plot\")\n", "save_fig(\"ridge_regression_plot\")\n",
"plt.show()" "plt.show()"
@ -852,7 +865,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import Ridge\n", "from sklearn.linear_model import Ridge\n",
"ridge_reg = Ridge(alpha=1, solver=\"cholesky\")\n", "ridge_reg = Ridge(alpha=1, solver=\"cholesky\", random_state=42)\n",
"ridge_reg.fit(X, y)\n", "ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])" "ridge_reg.predict([[1.5]])"
] ]
@ -882,7 +895,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"ridge_reg = Ridge(alpha=1, solver=\"sag\")\n", "ridge_reg = Ridge(alpha=1, solver=\"sag\", random_state=42)\n",
"ridge_reg.fit(X, y)\n", "ridge_reg.fit(X, y)\n",
"ridge_reg.predict([[1.5]])" "ridge_reg.predict([[1.5]])"
] ]
@ -901,10 +914,10 @@
"\n", "\n",
"plt.figure(figsize=(8,4))\n", "plt.figure(figsize=(8,4))\n",
"plt.subplot(121)\n", "plt.subplot(121)\n",
"plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1))\n", "plot_model(Lasso, polynomial=False, alphas=(0, 0.1, 1), random_state=42)\n",
"plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
"plt.subplot(122)\n", "plt.subplot(122)\n",
"plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1)\n", "plot_model(Lasso, polynomial=True, alphas=(0, 10**-7, 1), tol=1, random_state=42)\n",
"\n", "\n",
"save_fig(\"lasso_regression_plot\")\n", "save_fig(\"lasso_regression_plot\")\n",
"plt.show()" "plt.show()"
@ -937,7 +950,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import ElasticNet\n", "from sklearn.linear_model import ElasticNet\n",
"elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)\n", "elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=42)\n",
"elastic_net.fit(X, y)\n", "elastic_net.fit(X, y)\n",
"elastic_net.predict([[1.5]])" "elastic_net.predict([[1.5]])"
] ]
@ -953,10 +966,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"rnd.seed(42)\n", "np.random.seed(42)\n",
"m = 100\n", "m = 100\n",
"X = 6 * rnd.rand(m, 1) - 3\n", "X = 6 * np.random.rand(m, 1) - 3\n",
"y = 2 + X + 0.5 * X**2 + rnd.randn(m, 1)\n", "y = 2 + X + 0.5 * X**2 + np.random.randn(m, 1)\n",
"\n", "\n",
"X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n", "X_train, X_val, y_train, y_val = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)\n",
"\n", "\n",
@ -1037,7 +1050,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 45,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1225,7 +1240,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 52, "execution_count": 52,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1237,12 +1254,14 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 53, "execution_count": 53,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from sklearn.linear_model import LogisticRegression\n", "from sklearn.linear_model import LogisticRegression\n",
"log_reg = LogisticRegression()\n", "log_reg = LogisticRegression(random_state=42)\n",
"log_reg.fit(X, y)" "log_reg.fit(X, y)"
] ]
}, },
@ -1250,7 +1269,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 54,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1263,7 +1284,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"The figure in the book actually is actually a bit fancier:" "The figure in the book actually is actually a bit fancier:"
] ]
@ -1340,7 +1364,7 @@
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = (iris[\"target\"] == 2).astype(np.int)\n", "y = (iris[\"target\"] == 2).astype(np.int)\n",
"\n", "\n",
"log_reg = LogisticRegression(C=10**10)\n", "log_reg = LogisticRegression(C=10**10, random_state=42)\n",
"log_reg.fit(X, y)\n", "log_reg.fit(X, y)\n",
"\n", "\n",
"x0, x1 = np.meshgrid(\n", "x0, x1 = np.meshgrid(\n",
@ -1386,7 +1410,7 @@
"X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n", "X = iris[\"data\"][:, (2, 3)] # petal length, petal width\n",
"y = iris[\"target\"]\n", "y = iris[\"target\"]\n",
"\n", "\n",
"softmax_reg = LogisticRegression(multi_class=\"multinomial\",solver=\"lbfgs\", C=10)\n", "softmax_reg = LogisticRegression(multi_class=\"multinomial\",solver=\"lbfgs\", C=10, random_state=42)\n",
"softmax_reg.fit(X, y)" "softmax_reg.fit(X, y)"
] ]
}, },
@ -1394,7 +1418,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 60, "execution_count": 60,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1468,21 +1494,30 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"## 1. to 11." "## 1. to 11."
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"See appendix A." "See appendix A."
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"## 12. Batch Gradient Descent with early stopping for Softmax Regression\n", "## 12. Batch Gradient Descent with early stopping for Softmax Regression\n",
"(without using Scikit-Learn)" "(without using Scikit-Learn)"
@ -1490,7 +1525,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Let's start by loading the data. We will just reuse the Iris dataset we loaded earlier." "Let's start by loading the data. We will just reuse the Iris dataset we loaded earlier."
] ]
@ -1499,7 +1537,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 63, "execution_count": 63,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1509,7 +1549,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"We need to add the bias term for every instance ($x_0 = 1$):" "We need to add the bias term for every instance ($x_0 = 1$):"
] ]
@ -1518,7 +1561,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 64, "execution_count": 64,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1527,7 +1572,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"And let's set the random seed so the output of this exercise solution is reproducible:" "And let's set the random seed so the output of this exercise solution is reproducible:"
] ]
@ -1536,7 +1584,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 65, "execution_count": 65,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1545,7 +1595,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"The easiest option to split the dataset into a training set, a validation set and a test set would be to use Scikit-Learn's `train_test_split()` function, but the point of this exercise is to try understand the algorithms by implementing them manually. So here is one possible implementation:" "The easiest option to split the dataset into a training set, a validation set and a test set would be to use Scikit-Learn's `train_test_split()` function, but the point of this exercise is to try understand the algorithms by implementing them manually. So here is one possible implementation:"
] ]
@ -1554,7 +1607,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 66,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1578,7 +1633,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"The targets are currently class indices (0, 1 or 2), but we need target class probabilities to train the Softmax Regression model. Each instance will have target class probabilities equal to 0.0 for all classes except for the target class which will have a probability of 1.0 (in other words, the vector of class probabilities for ay given instance is a one-hot vector). Let's write a small function to convert the vector of class indices into a matrix containing a one-hot vector for each instance:" "The targets are currently class indices (0, 1 or 2), but we need target class probabilities to train the Softmax Regression model. Each instance will have target class probabilities equal to 0.0 for all classes except for the target class which will have a probability of 1.0 (in other words, the vector of class probabilities for ay given instance is a one-hot vector). Let's write a small function to convert the vector of class indices into a matrix containing a one-hot vector for each instance:"
] ]
@ -1587,7 +1645,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 67,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1601,7 +1661,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Let's test this function on the first 10 instances:" "Let's test this function on the first 10 instances:"
] ]
@ -1610,7 +1673,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 68, "execution_count": 68,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1621,7 +1686,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 69, "execution_count": 69,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1630,7 +1697,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Looks good, so let's create the target class probabilities matrix for the training set and the test set:" "Looks good, so let's create the target class probabilities matrix for the training set and the test set:"
] ]
@ -1639,7 +1709,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 70, "execution_count": 70,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1650,7 +1722,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Now let's implement the Softmax function. Recall that it is defined by the following equation:\n", "Now let's implement the Softmax function. Recall that it is defined by the following equation:\n",
"\n", "\n",
@ -1661,7 +1736,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 71, "execution_count": 71,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1673,7 +1750,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"We are almost ready to start training. Let's define the number of inputs and outputs:" "We are almost ready to start training. Let's define the number of inputs and outputs:"
] ]
@ -1694,7 +1774,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Now here comes the hardest part: training! Theoretically, it's simple: it's just a matter of translating the math equations into Python code. But in practice, it can be quite tricky: in particular, it's easy to mix up the order of the terms, or the indices. You can even end up with code that looks like it's working but is actually not computing exactly the right thing. When unsure, you should write down the shape of each term in the equation and make sure the corresponding terms in your code match closely. It can also help to evaluate each term independently and print them out. The good news it that you won't have to do this everyday, since all this is well implemented by Scikit-Learn, but it will help you understand what's going on under the hood.\n", "Now here comes the hardest part: training! Theoretically, it's simple: it's just a matter of translating the math equations into Python code. But in practice, it can be quite tricky: in particular, it's easy to mix up the order of the terms, or the indices. You can even end up with code that looks like it's working but is actually not computing exactly the right thing. When unsure, you should write down the shape of each term in the equation and make sure the corresponding terms in your code match closely. It can also help to evaluate each term independently and print them out. The good news it that you won't have to do this everyday, since all this is well implemented by Scikit-Learn, but it will help you understand what's going on under the hood.\n",
"\n", "\n",
@ -1714,7 +1797,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 73,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1738,7 +1823,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"And that's it! The Softmax model is trained. Let's look at the model parameters:" "And that's it! The Softmax model is trained. Let's look at the model parameters:"
] ]
@ -1747,7 +1835,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 74,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1756,7 +1846,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Let's make predictions for the validation set and check the accuracy score:" "Let's make predictions for the validation set and check the accuracy score:"
] ]
@ -1765,7 +1858,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 75,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1779,7 +1874,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Well, this model looks pretty good. For the sake of the exercise, let's add a bit of $\\ell_2$ regularization. The following training code is similar to the one above, but the loss now has an additional $\\ell_2$ penalty, and the gradients have the proper additional term (note that we don't regularize the first element of `Theta` since this corresponds to the bias term). Also, let's try increasing the learning rate `eta`." "Well, this model looks pretty good. For the sake of the exercise, let's add a bit of $\\ell_2$ regularization. The following training code is similar to the one above, but the loss now has an additional $\\ell_2$ penalty, and the gradients have the proper additional term (note that we don't regularize the first element of `Theta` since this corresponds to the bias term). Also, let's try increasing the learning rate `eta`."
] ]
@ -1788,7 +1886,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 76,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1815,7 +1915,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Because of the additional $\\ell_2$ penalty, the loss seems greater than earlier, but perhaps this model will perform better? Let's find out:" "Because of the additional $\\ell_2$ penalty, the loss seems greater than earlier, but perhaps this model will perform better? Let's find out:"
] ]
@ -1824,7 +1927,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 77,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1838,14 +1943,20 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Cool, perfect accuracy! We probably just got lucky with this validation set, but still, it's pleasant." "Cool, perfect accuracy! We probably just got lucky with this validation set, but still, it's pleasant."
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Now let's add early stopping. For this we just need to measure the loss on the validation set at every iteration and stop when the error starts growing." "Now let's add early stopping. For this we just need to measure the loss on the validation set at every iteration and stop when the error starts growing."
] ]
@ -1854,7 +1965,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 78,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1896,7 +2009,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 79,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1910,14 +2025,20 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Still perfect, but faster." "Still perfect, but faster."
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Now let's plot the model's predictions on the whole dataset:" "Now let's plot the model's predictions on the whole dataset:"
] ]
@ -1926,7 +2047,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 80,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1964,7 +2087,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"And now let's measure the final model's accuracy on the test set:" "And now let's measure the final model's accuracy on the test set:"
] ]
@ -1973,7 +2099,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": 81,
"metadata": { "metadata": {
"collapsed": false "collapsed": false,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1987,7 +2115,10 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"deletable": true,
"editable": true
},
"source": [ "source": [
"Our perfect model turns out to have slight imperfections. This variability is likely due to the very small size of the dataset: depending on how you sample the training set, validation set and the test set, you can get quite different results. Try changing the random seed and running the code again a few times, you will see that the results will vary." "Our perfect model turns out to have slight imperfections. This variability is likely due to the very small size of the dataset: depending on how you sample the training set, validation set and the test set, you can get quite different results. Try changing the random seed and running the code again a few times, you will see that the results will vary."
] ]
@ -1996,7 +2127,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"collapsed": true "collapsed": true,
"deletable": true,
"editable": true
}, },
"outputs": [], "outputs": [],
"source": [] "source": []