diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index d8a7110..dedd201 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -49,11 +49,10 @@ "\n", "# Common imports\n", "import numpy as np\n", - "import numpy.random as rnd\n", "import os\n", "\n", "# to make this notebook's output stable across runs\n", - "rnd.seed(42)\n", + "np.random.seed(42)\n", "\n", "# To plot pretty figures\n", "%matplotlib inline\n", @@ -1154,9 +1153,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -1178,7 +1179,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 70, "metadata": { "collapsed": false, "deletable": true, @@ -1196,7 +1197,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 71, "metadata": { "collapsed": false, "deletable": true, @@ -1210,7 +1211,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 72, "metadata": { "collapsed": false, "deletable": true, @@ -1233,7 +1234,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 73, "metadata": { "collapsed": false, "deletable": true, @@ -1249,7 +1250,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 74, "metadata": { "collapsed": false, "deletable": true, @@ -1262,20 +1263,7 @@ "some_labels = housing_labels.iloc[:5]\n", "some_data_prepared = full_pipeline.transform(some_data)\n", "\n", - "print(\"Predictions:\\t\", lin_reg.predict(some_data_prepared))" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "print(\"Labels:\\t\\t\", list(some_labels))" + "print(\"Predictions:\", lin_reg.predict(some_data_prepared))" ] }, { @@ -1288,7 +1276,7 @@ }, "outputs": [], "source": [ - "some_data_prepared" + "print(\"Labels:\", list(some_labels))" ] }, { @@ -1300,6 +1288,19 @@ "editable": true }, "outputs": [], + "source": [ + "some_data_prepared" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error\n", "\n", @@ -1311,7 +1312,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 78, "metadata": { "collapsed": false, "deletable": true, @@ -1327,7 +1328,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 79, "metadata": { "collapsed": false, "deletable": true, @@ -1337,13 +1338,13 @@ "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "\n", - "tree_reg = DecisionTreeRegressor()\n", + "tree_reg = DecisionTreeRegressor(random_state=42)\n", "tree_reg.fit(housing_prepared, housing_labels)" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 80, "metadata": { "collapsed": false, "deletable": true, @@ -1369,7 +1370,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 81, "metadata": { "collapsed": false, "deletable": true, @@ -1386,7 +1387,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 82, "metadata": { "collapsed": false, "deletable": true, @@ -1404,7 +1405,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 83, "metadata": { "collapsed": false, "deletable": true, @@ -1420,7 +1421,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 84, "metadata": { "collapsed": false, "deletable": true, @@ -1430,13 +1431,13 @@ "source": [ "from sklearn.ensemble import RandomForestRegressor\n", "\n", - "forest_reg = RandomForestRegressor()\n", + "forest_reg = RandomForestRegressor(random_state=42)\n", "forest_reg.fit(housing_prepared, housing_labels)" ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 85, "metadata": { "collapsed": false, "deletable": true, @@ -1452,7 +1453,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 86, "metadata": { "collapsed": false, "deletable": true, @@ -1470,7 +1471,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 87, "metadata": { "collapsed": false, "deletable": true, @@ -1484,7 +1485,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 88, "metadata": { "collapsed": false, "deletable": true, @@ -1504,7 +1505,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 89, "metadata": { "collapsed": false, "deletable": true, @@ -1519,25 +1520,12 @@ " {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},\n", " ]\n", "\n", - "forest_reg = RandomForestRegressor()\n", + "forest_reg = RandomForestRegressor(random_state=42)\n", "grid_search = GridSearchCV(forest_reg, param_grid, cv=5,\n", " scoring='neg_mean_squared_error')\n", "grid_search.fit(housing_prepared, housing_labels)" ] }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, - "outputs": [], - "source": [ - "grid_search.best_params_" - ] - }, { "cell_type": "code", "execution_count": 90, @@ -1548,7 +1536,7 @@ }, "outputs": [], "source": [ - "grid_search.best_estimator_" + "grid_search.best_params_" ] }, { @@ -1560,6 +1548,19 @@ "editable": true }, "outputs": [], + "source": [ + "grid_search.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], "source": [ "cvres = grid_search.cv_results_\n", "for mean_score, params in zip(cvres[\"mean_test_score\"], cvres[\"params\"]):\n", @@ -1568,7 +1569,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 93, "metadata": { "collapsed": false, "deletable": true, @@ -1581,7 +1582,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 94, "metadata": { "collapsed": false, "deletable": true, @@ -1597,15 +1598,15 @@ " 'max_features': randint(low=1, high=8),\n", " }\n", "\n", - "forest_reg = RandomForestRegressor()\n", + "forest_reg = RandomForestRegressor(random_state=42)\n", "rnd_search = RandomizedSearchCV(forest_reg, param_distributions=param_distribs,\n", - " n_iter=10, cv=5, scoring='neg_mean_squared_error')\n", + " n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)\n", "rnd_search.fit(housing_prepared, housing_labels)" ] }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 95, "metadata": { "collapsed": false, "deletable": true, @@ -1620,7 +1621,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 96, "metadata": { "collapsed": false, "deletable": true, @@ -1634,7 +1635,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 97, "metadata": { "collapsed": false, "deletable": true, @@ -1650,7 +1651,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 98, "metadata": { "collapsed": true, "deletable": true, @@ -1672,7 +1673,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 99, "metadata": { "collapsed": false, "deletable": true, @@ -1708,7 +1709,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 100, "metadata": { "collapsed": false, "deletable": true, @@ -1745,7 +1746,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 101, "metadata": { "collapsed": true, "deletable": true, @@ -1758,7 +1759,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 102, "metadata": { "collapsed": true, "deletable": true, @@ -1784,7 +1785,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 103, "metadata": { "collapsed": false, "deletable": true, @@ -1793,8 +1794,8 @@ "outputs": [], "source": [ "from scipy.stats import geom, expon\n", - "geom_distrib=geom(0.5).rvs(10000)\n", - "expon_distrib=expon(scale=1).rvs(10000)\n", + "geom_distrib=geom(0.5).rvs(10000, random_state=42)\n", + "expon_distrib=expon(scale=1).rvs(10000, random_state=42)\n", "plt.hist(geom_distrib, bins=50)\n", "plt.show()\n", "plt.hist(expon_distrib, bins=50)\n", @@ -1834,7 +1835,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 104, "metadata": { "collapsed": false, "deletable": true, @@ -1851,7 +1852,7 @@ " ]\n", "\n", "svm_reg = SVR()\n", - "grid_search = GridSearchCV(svm_reg,param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)\n", + "grid_search = GridSearchCV(svm_reg, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)\n", "grid_search.fit(housing_prepared, housing_labels)" ] }, @@ -1867,7 +1868,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 105, "metadata": { "collapsed": false, "deletable": true, @@ -1892,7 +1893,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 106, "metadata": { "collapsed": false, "deletable": true, @@ -1935,7 +1936,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 107, "metadata": { "collapsed": false, "deletable": true, @@ -1958,7 +1959,8 @@ "\n", "svm_reg = SVR()\n", "rnd_search = RandomizedSearchCV(svm_reg, param_distributions=param_distribs,\n", - " n_iter=50, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)\n", + " n_iter=50, cv=5, scoring='neg_mean_squared_error',\n", + " verbose=2, n_jobs=4, random_state=42)\n", "rnd_search.fit(housing_prepared, housing_labels)" ] }, @@ -1974,7 +1976,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 108, "metadata": { "collapsed": false, "deletable": true, @@ -1999,7 +2001,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 109, "metadata": { "collapsed": false, "deletable": true, @@ -2032,7 +2034,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 110, "metadata": { "collapsed": false, "deletable": true, @@ -2041,7 +2043,7 @@ "outputs": [], "source": [ "expon_distrib = expon(scale=1.)\n", - "samples = expon_distrib.rvs(10000)\n", + "samples = expon_distrib.rvs(10000, random_state=42)\n", "plt.figure(figsize=(10, 4))\n", "plt.subplot(121)\n", "plt.title(\"Exponential distribution (scale=1.0)\")\n", @@ -2064,7 +2066,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 111, "metadata": { "collapsed": false, "deletable": true, @@ -2073,7 +2075,7 @@ "outputs": [], "source": [ "reciprocal_distrib = reciprocal(20, 200000)\n", - "samples = reciprocal_distrib.rvs(10000)\n", + "samples = reciprocal_distrib.rvs(10000, random_state=42)\n", "plt.figure(figsize=(10, 4))\n", "plt.subplot(121)\n", "plt.title(\"Reciprocal distribution (scale=1.0)\")\n", @@ -2116,7 +2118,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 112, "metadata": { "collapsed": true, "deletable": true, @@ -2162,7 +2164,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 113, "metadata": { "collapsed": true, "deletable": true, @@ -2185,7 +2187,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 114, "metadata": { "collapsed": false, "deletable": true, @@ -2199,7 +2201,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 115, "metadata": { "collapsed": false, "deletable": true, @@ -2222,7 +2224,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 116, "metadata": { "collapsed": false, "deletable": true, @@ -2245,7 +2247,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 117, "metadata": { "collapsed": false, "deletable": true, @@ -2261,7 +2263,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 118, "metadata": { "collapsed": true, "deletable": true, @@ -2284,7 +2286,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 119, "metadata": { "collapsed": false, "deletable": true, @@ -2307,7 +2309,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 120, "metadata": { "collapsed": false, "deletable": true,