Make notebook for ch2 output the same result every time

main
Aurélien Geron 2017-06-06 13:21:19 +02:00
parent 9910d31ec3
commit bd6c167e09
1 changed files with 91 additions and 89 deletions

View File

@ -49,11 +49,10 @@
"\n",
"# Common imports\n",
"import numpy as np\n",
"import numpy.random as rnd\n",
"import os\n",
"\n",
"# to make this notebook's output stable across runs\n",
"rnd.seed(42)\n",
"np.random.seed(42)\n",
"\n",
"# To plot pretty figures\n",
"%matplotlib inline\n",
@ -1154,9 +1153,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 69,
"metadata": {
"collapsed": true
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
@ -1178,7 +1179,7 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 70,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1196,7 +1197,7 @@
},
{
"cell_type": "code",
"execution_count": 70,
"execution_count": 71,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1210,7 +1211,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 72,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1233,7 +1234,7 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 73,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1249,7 +1250,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 74,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1262,20 +1263,7 @@
"some_labels = housing_labels.iloc[:5]\n",
"some_data_prepared = full_pipeline.transform(some_data)\n",
"\n",
"print(\"Predictions:\\t\", lin_reg.predict(some_data_prepared))"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"print(\"Labels:\\t\\t\", list(some_labels))"
"print(\"Predictions:\", lin_reg.predict(some_data_prepared))"
]
},
{
@ -1288,7 +1276,7 @@
},
"outputs": [],
"source": [
"some_data_prepared"
"print(\"Labels:\", list(some_labels))"
]
},
{
@ -1300,6 +1288,19 @@
"editable": true
},
"outputs": [],
"source": [
"some_data_prepared"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from sklearn.metrics import mean_squared_error\n",
"\n",
@ -1311,7 +1312,7 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 78,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1327,7 +1328,7 @@
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": 79,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1337,13 +1338,13 @@
"source": [
"from sklearn.tree import DecisionTreeRegressor\n",
"\n",
"tree_reg = DecisionTreeRegressor()\n",
"tree_reg = DecisionTreeRegressor(random_state=42)\n",
"tree_reg.fit(housing_prepared, housing_labels)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": 80,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1369,7 +1370,7 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 81,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1386,7 +1387,7 @@
},
{
"cell_type": "code",
"execution_count": 81,
"execution_count": 82,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1404,7 +1405,7 @@
},
{
"cell_type": "code",
"execution_count": 82,
"execution_count": 83,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1420,7 +1421,7 @@
},
{
"cell_type": "code",
"execution_count": 83,
"execution_count": 84,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1430,13 +1431,13 @@
"source": [
"from sklearn.ensemble import RandomForestRegressor\n",
"\n",
"forest_reg = RandomForestRegressor()\n",
"forest_reg = RandomForestRegressor(random_state=42)\n",
"forest_reg.fit(housing_prepared, housing_labels)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": 85,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1452,7 +1453,7 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 86,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1470,7 +1471,7 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 87,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1484,7 +1485,7 @@
},
{
"cell_type": "code",
"execution_count": 87,
"execution_count": 88,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1504,7 +1505,7 @@
},
{
"cell_type": "code",
"execution_count": 88,
"execution_count": 89,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1519,25 +1520,12 @@
" {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},\n",
" ]\n",
"\n",
"forest_reg = RandomForestRegressor()\n",
"forest_reg = RandomForestRegressor(random_state=42)\n",
"grid_search = GridSearchCV(forest_reg, param_grid, cv=5,\n",
" scoring='neg_mean_squared_error')\n",
"grid_search.fit(housing_prepared, housing_labels)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"grid_search.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 90,
@ -1548,7 +1536,7 @@
},
"outputs": [],
"source": [
"grid_search.best_estimator_"
"grid_search.best_params_"
]
},
{
@ -1560,6 +1548,19 @@
"editable": true
},
"outputs": [],
"source": [
"grid_search.best_estimator_"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"cvres = grid_search.cv_results_\n",
"for mean_score, params in zip(cvres[\"mean_test_score\"], cvres[\"params\"]):\n",
@ -1568,7 +1569,7 @@
},
{
"cell_type": "code",
"execution_count": 92,
"execution_count": 93,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1581,7 +1582,7 @@
},
{
"cell_type": "code",
"execution_count": 93,
"execution_count": 94,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1597,15 +1598,15 @@
" 'max_features': randint(low=1, high=8),\n",
" }\n",
"\n",
"forest_reg = RandomForestRegressor()\n",
"forest_reg = RandomForestRegressor(random_state=42)\n",
"rnd_search = RandomizedSearchCV(forest_reg, param_distributions=param_distribs,\n",
" n_iter=10, cv=5, scoring='neg_mean_squared_error')\n",
" n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)\n",
"rnd_search.fit(housing_prepared, housing_labels)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"execution_count": 95,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1620,7 +1621,7 @@
},
{
"cell_type": "code",
"execution_count": 95,
"execution_count": 96,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1634,7 +1635,7 @@
},
{
"cell_type": "code",
"execution_count": 96,
"execution_count": 97,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1650,7 +1651,7 @@
},
{
"cell_type": "code",
"execution_count": 97,
"execution_count": 98,
"metadata": {
"collapsed": true,
"deletable": true,
@ -1672,7 +1673,7 @@
},
{
"cell_type": "code",
"execution_count": 98,
"execution_count": 99,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1708,7 +1709,7 @@
},
{
"cell_type": "code",
"execution_count": 99,
"execution_count": 100,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1745,7 +1746,7 @@
},
{
"cell_type": "code",
"execution_count": 100,
"execution_count": 101,
"metadata": {
"collapsed": true,
"deletable": true,
@ -1758,7 +1759,7 @@
},
{
"cell_type": "code",
"execution_count": 101,
"execution_count": 102,
"metadata": {
"collapsed": true,
"deletable": true,
@ -1784,7 +1785,7 @@
},
{
"cell_type": "code",
"execution_count": 102,
"execution_count": 103,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1793,8 +1794,8 @@
"outputs": [],
"source": [
"from scipy.stats import geom, expon\n",
"geom_distrib=geom(0.5).rvs(10000)\n",
"expon_distrib=expon(scale=1).rvs(10000)\n",
"geom_distrib=geom(0.5).rvs(10000, random_state=42)\n",
"expon_distrib=expon(scale=1).rvs(10000, random_state=42)\n",
"plt.hist(geom_distrib, bins=50)\n",
"plt.show()\n",
"plt.hist(expon_distrib, bins=50)\n",
@ -1834,7 +1835,7 @@
},
{
"cell_type": "code",
"execution_count": 103,
"execution_count": 104,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1851,7 +1852,7 @@
" ]\n",
"\n",
"svm_reg = SVR()\n",
"grid_search = GridSearchCV(svm_reg,param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)\n",
"grid_search = GridSearchCV(svm_reg, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)\n",
"grid_search.fit(housing_prepared, housing_labels)"
]
},
@ -1867,7 +1868,7 @@
},
{
"cell_type": "code",
"execution_count": 104,
"execution_count": 105,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1892,7 +1893,7 @@
},
{
"cell_type": "code",
"execution_count": 105,
"execution_count": 106,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1935,7 +1936,7 @@
},
{
"cell_type": "code",
"execution_count": 106,
"execution_count": 107,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1958,7 +1959,8 @@
"\n",
"svm_reg = SVR()\n",
"rnd_search = RandomizedSearchCV(svm_reg, param_distributions=param_distribs,\n",
" n_iter=50, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=4)\n",
" n_iter=50, cv=5, scoring='neg_mean_squared_error',\n",
" verbose=2, n_jobs=4, random_state=42)\n",
"rnd_search.fit(housing_prepared, housing_labels)"
]
},
@ -1974,7 +1976,7 @@
},
{
"cell_type": "code",
"execution_count": 107,
"execution_count": 108,
"metadata": {
"collapsed": false,
"deletable": true,
@ -1999,7 +2001,7 @@
},
{
"cell_type": "code",
"execution_count": 108,
"execution_count": 109,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2032,7 +2034,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"execution_count": 110,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2041,7 +2043,7 @@
"outputs": [],
"source": [
"expon_distrib = expon(scale=1.)\n",
"samples = expon_distrib.rvs(10000)\n",
"samples = expon_distrib.rvs(10000, random_state=42)\n",
"plt.figure(figsize=(10, 4))\n",
"plt.subplot(121)\n",
"plt.title(\"Exponential distribution (scale=1.0)\")\n",
@ -2064,7 +2066,7 @@
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": 111,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2073,7 +2075,7 @@
"outputs": [],
"source": [
"reciprocal_distrib = reciprocal(20, 200000)\n",
"samples = reciprocal_distrib.rvs(10000)\n",
"samples = reciprocal_distrib.rvs(10000, random_state=42)\n",
"plt.figure(figsize=(10, 4))\n",
"plt.subplot(121)\n",
"plt.title(\"Reciprocal distribution (scale=1.0)\")\n",
@ -2116,7 +2118,7 @@
},
{
"cell_type": "code",
"execution_count": 111,
"execution_count": 112,
"metadata": {
"collapsed": true,
"deletable": true,
@ -2162,7 +2164,7 @@
},
{
"cell_type": "code",
"execution_count": 112,
"execution_count": 113,
"metadata": {
"collapsed": true,
"deletable": true,
@ -2185,7 +2187,7 @@
},
{
"cell_type": "code",
"execution_count": 113,
"execution_count": 114,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2199,7 +2201,7 @@
},
{
"cell_type": "code",
"execution_count": 114,
"execution_count": 115,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2222,7 +2224,7 @@
},
{
"cell_type": "code",
"execution_count": 115,
"execution_count": 116,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2245,7 +2247,7 @@
},
{
"cell_type": "code",
"execution_count": 116,
"execution_count": 117,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2261,7 +2263,7 @@
},
{
"cell_type": "code",
"execution_count": 117,
"execution_count": 118,
"metadata": {
"collapsed": true,
"deletable": true,
@ -2284,7 +2286,7 @@
},
{
"cell_type": "code",
"execution_count": 118,
"execution_count": 119,
"metadata": {
"collapsed": false,
"deletable": true,
@ -2307,7 +2309,7 @@
},
{
"cell_type": "code",
"execution_count": 119,
"execution_count": 120,
"metadata": {
"collapsed": false,
"deletable": true,