Merge PR #36 by lsshawn: adds helpful notes and fixes the null rows sampling code
parent
48718eff9d
commit
7f4cd72d97
|
@ -16,7 +16,10 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"**Note**: You may find little differences between the code outputs in the book and in these Jupyter notebooks: these slight differences are mostly due to the random nature of many training algorithms: although I have tried to make these notebooks' outputs as constant as possible, it is impossible to guarantee that they will produce the exact same output on every platform. Also, some data structures (such as dictionaries) do not preserve the item order. Finally, I fixed a few minor bugs (I added notes next to the concerned cells) which lead to slightly different results, without changing the ideas presented in the book."
|
||||
]
|
||||
|
@ -245,6 +248,7 @@
|
|||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# For illustration only. Sklearn has train_test_split()\n",
|
||||
"def split_train_test(data, test_ratio):\n",
|
||||
" shuffled_indices = np.random.permutation(len(data))\n",
|
||||
" test_set_size = int(len(data) * test_ratio)\n",
|
||||
|
@ -395,7 +399,9 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Divide by 1.5 to limit the number of income categories\n",
|
||||
"housing[\"income_cat\"] = np.ceil(housing[\"median_income\"] / 1.5)\n",
|
||||
"# Label those above 5 as 5\n",
|
||||
"housing[\"income_cat\"].where(housing[\"income_cat\"] < 5, 5.0, inplace=True)"
|
||||
]
|
||||
},
|
||||
|
@ -416,7 +422,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -558,7 +566,10 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"The argument `sharex=False` fixes a display bug (the x-axis values and legend were not displayed). This is a temporary fix (see: https://github.com/pandas-dev/pandas/issues/10611). Thanks to Wilmer Arellano for pointing it out."
|
||||
]
|
||||
|
@ -649,10 +660,12 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing.plot(kind=\"scatter\", x=\"median_income\", y=\"median_house_value\",\n",
|
||||
" alpha=0.1)\n",
|
||||
"plt.axis([0, 16, 0, 550000])\n",
|
||||
"save_fig(\"income_vs_house_value_scatterplot\")"
|
||||
"from pandas.tools.plotting import scatter_matrix\n",
|
||||
"\n",
|
||||
"attributes = [\"median_house_value\", \"median_income\", \"total_rooms\",\n",
|
||||
" \"housing_median_age\"]\n",
|
||||
"scatter_matrix(housing[attributes], figsize=(12, 8))\n",
|
||||
"save_fig(\"scatter_matrix_plot\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -665,12 +678,10 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas.tools.plotting import scatter_matrix\n",
|
||||
"\n",
|
||||
"attributes = [\"median_house_value\", \"median_income\", \"total_rooms\",\n",
|
||||
" \"housing_median_age\"]\n",
|
||||
"scatter_matrix(housing[attributes], figsize=(12, 8))\n",
|
||||
"save_fig(\"scatter_matrix_plot\")"
|
||||
"housing.plot(kind=\"scatter\", x=\"median_income\", y=\"median_house_value\",\n",
|
||||
" alpha=0.1)\n",
|
||||
"plt.axis([0, 16, 0, 550000])\n",
|
||||
"save_fig(\"income_vs_house_value_scatterplot\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -690,7 +701,10 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"source": [
|
||||
"Note: there was a bug in the previous cell, in the definition of the `rooms_per_household` attribute. This explains why the correlation value below differs slightly from the value in the book (unless you are reading the latest version)."
|
||||
]
|
||||
|
@ -758,7 +772,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing = strat_train_set.drop(\"median_house_value\", axis=1)\n",
|
||||
"housing = strat_train_set.drop(\"median_house_value\", axis=1) # drop labels for training set\n",
|
||||
"housing_labels = strat_train_set[\"median_house_value\"].copy()"
|
||||
]
|
||||
},
|
||||
|
@ -772,7 +786,8 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing.iloc[21:24]"
|
||||
"sample_incomplete_rows = housing[housing.isnull().any(axis=1)].head()\n",
|
||||
"sample_incomplete_rows"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -785,8 +800,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_copy = housing.copy().iloc[21:24]\n",
|
||||
"housing_copy.dropna(subset=[\"total_bedrooms\"]) # option 1"
|
||||
"sample_incomplete_rows.dropna(subset=[\"total_bedrooms\"]) # option 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -799,8 +813,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_copy = housing.copy().iloc[21:24]\n",
|
||||
"housing_copy.drop(\"total_bedrooms\", axis=1) # option 2"
|
||||
"sample_incomplete_rows.drop(\"total_bedrooms\", axis=1) # option 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -813,10 +826,9 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_copy = housing.copy().iloc[21:24]\n",
|
||||
"median = housing_copy[\"total_bedrooms\"].median()\n",
|
||||
"housing_copy[\"total_bedrooms\"].fillna(median, inplace=True) # option 3\n",
|
||||
"housing_copy"
|
||||
"median = housing[\"total_bedrooms\"].median()\n",
|
||||
"sample_incomplete_rows[\"total_bedrooms\"].fillna(median, inplace=True) # option 3\n",
|
||||
"sample_incomplete_rows"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -828,65 +840,22 @@
|
|||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_copy.drop(\"total_bedrooms\", axis=1) # option 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"median = housing_copy[\"total_bedrooms\"].median()\n",
|
||||
"housing_copy[\"total_bedrooms\"].fillna(median, inplace=True) # option 3\n",
|
||||
"housing_copy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Summary...\n",
|
||||
"housing_copy = housing.copy().iloc[21:24]\n",
|
||||
"housing_copy.dropna(subset=[\"total_bedrooms\"]) # option 1\n",
|
||||
"\n",
|
||||
"housing_copy = housing.copy().iloc[21:24]\n",
|
||||
"housing_copy.drop(\"total_bedrooms\", axis=1) # option 2\n",
|
||||
"\n",
|
||||
"housing_copy = housing.copy().iloc[21:24]\n",
|
||||
"median = housing_copy[\"total_bedrooms\"].median()\n",
|
||||
"housing_copy[\"total_bedrooms\"].fillna(median, inplace=True) # option 3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import Imputer\n",
|
||||
"\n",
|
||||
"imputer = Imputer(strategy=\"median\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Remove the text attribute because median can only be calculated on numerical attributes:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 48,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -899,7 +868,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"execution_count": 49,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -912,7 +881,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 50,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -923,9 +892,16 @@
|
|||
"imputer.statistics_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check that this is the same as manually computing the median of each attribute:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 51,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -936,9 +912,16 @@
|
|||
"housing_num.median().values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Transform the training set:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 52,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -951,20 +934,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_tr = pd.DataFrame(X, columns=housing_num.columns)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 53,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -972,12 +942,26 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_tr.iloc[21:24]"
|
||||
"housing_tr = pd.DataFrame(X, columns=housing_num.columns,\n",
|
||||
" index = list(housing.index.values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 54,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
"editable": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_tr.loc[sample_incomplete_rows.index.values]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -990,7 +974,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"execution_count": 56,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1004,7 +988,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"execution_count": 57,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1022,7 +1006,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"execution_count": 58,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1035,7 +1019,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"execution_count": 59,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1052,7 +1036,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 60,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1065,7 +1049,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 61,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1082,7 +1066,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"execution_count": 62,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1092,6 +1076,7 @@
|
|||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
"\n",
|
||||
"# column index\n",
|
||||
"rooms_ix, bedrooms_ix, population_ix, household_ix = 3, 4, 5, 6\n",
|
||||
"\n",
|
||||
"class CombinedAttributesAdder(BaseEstimator, TransformerMixin):\n",
|
||||
|
@ -1115,7 +1100,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 63,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1129,7 +1114,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 64,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1151,7 +1136,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 65,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1164,7 +1149,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 66,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -1174,6 +1159,8 @@
|
|||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
"\n",
|
||||
"# Create a class to select numerical or categorical columns \n",
|
||||
"# since Scikit-Learn doesn't handle DataFrames yet\n",
|
||||
"class DataFrameSelector(BaseEstimator, TransformerMixin):\n",
|
||||
" def __init__(self, attribute_names):\n",
|
||||
" self.attribute_names = attribute_names\n",
|
||||
|
@ -1185,7 +1172,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 67,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -1211,7 +1198,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 68,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1229,7 +1216,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 69,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1243,7 +1230,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 70,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1266,7 +1253,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"execution_count": 71,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1282,7 +1269,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 72,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1298,9 +1285,16 @@
|
|||
"print(\"Predictions:\", lin_reg.predict(some_data_prepared))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Compare against the actual values:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"execution_count": 73,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1313,7 +1307,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 74,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1326,7 +1320,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 75,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1344,7 +1338,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 76,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1360,7 +1354,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"execution_count": 77,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1376,7 +1370,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"execution_count": 78,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1402,7 +1396,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 79,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1419,7 +1413,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 80,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1437,7 +1431,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 81,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1453,7 +1447,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"execution_count": 82,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1469,7 +1463,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"execution_count": 83,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1485,7 +1479,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"execution_count": 84,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1503,7 +1497,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"execution_count": 85,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1517,7 +1511,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"execution_count": 86,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1537,7 +1531,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"execution_count": 87,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1548,19 +1542,29 @@
|
|||
"from sklearn.model_selection import GridSearchCV\n",
|
||||
"\n",
|
||||
"param_grid = [\n",
|
||||
" # try 12 (3×4) combinations of hyperparameters\n",
|
||||
" {'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},\n",
|
||||
" # then try 6 (2×3) combinations with bootstrap set as False\n",
|
||||
" {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"forest_reg = RandomForestRegressor(random_state=42)\n",
|
||||
"# train across 5 folds, that's a total of (12+6)*5=90 rounds of training \n",
|
||||
"grid_search = GridSearchCV(forest_reg, param_grid, cv=5,\n",
|
||||
" scoring='neg_mean_squared_error')\n",
|
||||
"grid_search.fit(housing_prepared, housing_labels)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The best hyperparameter combination found:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 88,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1573,7 +1577,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"execution_count": 89,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1584,9 +1588,16 @@
|
|||
"grid_search.best_estimator_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's look at the score of each hyperparameter combination tested during the grid search:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"execution_count": 90,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1601,7 +1612,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"execution_count": 91,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1614,7 +1625,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 95,
|
||||
"execution_count": 92,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1638,7 +1649,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"execution_count": 93,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1653,7 +1664,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 97,
|
||||
"execution_count": 94,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1667,7 +1678,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"execution_count": 95,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1683,7 +1694,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 99,
|
||||
"execution_count": 96,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -1705,7 +1716,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"execution_count": 97,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1741,7 +1752,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"execution_count": 98,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1778,7 +1789,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"execution_count": 99,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -1791,7 +1802,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"execution_count": 100,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -1817,7 +1828,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"execution_count": 101,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1867,7 +1878,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"execution_count": 102,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1900,7 +1911,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"execution_count": 103,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1925,7 +1936,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 107,
|
||||
"execution_count": 104,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -1968,7 +1979,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"execution_count": 105,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2008,7 +2019,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"execution_count": 106,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2033,7 +2044,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"execution_count": 107,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2066,7 +2077,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"execution_count": 108,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2098,7 +2109,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"execution_count": 109,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2150,7 +2161,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 113,
|
||||
"execution_count": 110,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -2196,7 +2207,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 114,
|
||||
"execution_count": 111,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -2219,7 +2230,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 115,
|
||||
"execution_count": 112,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2233,7 +2244,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"execution_count": 113,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2256,7 +2267,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 117,
|
||||
"execution_count": 114,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2279,7 +2290,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 118,
|
||||
"execution_count": 115,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2295,7 +2306,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 119,
|
||||
"execution_count": 116,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"deletable": true,
|
||||
|
@ -2318,7 +2329,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 120,
|
||||
"execution_count": 117,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2341,7 +2352,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"execution_count": 118,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2384,7 +2395,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 122,
|
||||
"execution_count": 119,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2401,7 +2412,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 123,
|
||||
"execution_count": 120,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2424,7 +2435,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"execution_count": 121,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2471,7 +2482,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 125,
|
||||
"execution_count": 122,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2491,7 +2502,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 126,
|
||||
"execution_count": 123,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
@ -2514,7 +2525,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 127,
|
||||
"execution_count": 124,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"deletable": true,
|
||||
|
|
Loading…
Reference in New Issue