Merge remote-tracking branch 'upstream/master' into upstream
commit
3699ad54a4
|
@ -406,9 +406,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing = strat_train_set.copy()"
|
||||
|
@ -486,9 +484,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"corr_matrix = housing.corr()"
|
||||
|
@ -533,9 +529,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing[\"rooms_per_household\"] = housing[\"total_rooms\"]/housing[\"households\"]\n",
|
||||
|
@ -591,9 +585,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing = strat_train_set.drop(\"median_house_value\", axis=1) # drop labels for training set\n",
|
||||
|
@ -642,9 +634,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import Imputer\n",
|
||||
|
@ -662,9 +652,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_num = housing.drop('ocean_proximity', axis=1)\n",
|
||||
|
@ -715,9 +703,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = imputer.transform(housing_num)"
|
||||
|
@ -726,9 +712,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_tr = pd.DataFrame(X, columns=housing_num.columns,\n",
|
||||
|
@ -859,9 +843,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Definition of the CategoricalEncoder class, copied from PR #9151.\n",
|
||||
|
@ -1126,9 +1108,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
|
@ -1175,9 +1155,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
|
@ -1211,9 +1189,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
|
@ -1261,9 +1237,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import FeatureUnion\n",
|
||||
|
@ -1411,9 +1385,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import cross_val_score\n",
|
||||
|
@ -1644,9 +1616,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 102,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"final_model = grid_search.best_estimator_\n",
|
||||
|
@ -1709,9 +1679,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"my_model = full_pipeline_with_predictor"
|
||||
|
@ -1720,9 +1688,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.externals import joblib\n",
|
||||
|
@ -1991,9 +1957,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
|
@ -2029,9 +1993,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 117,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"k = 5"
|
||||
|
@ -2089,9 +2051,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"preparation_and_feature_selection_pipeline = Pipeline([\n",
|
||||
|
@ -2103,9 +2063,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 122,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_prepared_top_k_features = preparation_and_feature_selection_pipeline.fit_transform(housing)"
|
||||
|
@ -2167,9 +2125,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 125,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prepare_select_and_predict_pipeline = Pipeline([\n",
|
||||
|
@ -2237,7 +2193,7 @@
|
|||
"source": [
|
||||
"param_grid = [\n",
|
||||
" {'preparation__num_pipeline__imputer__strategy': ['mean', 'median', 'most_frequent'],\n",
|
||||
" 'feature_selection__k': [3, 4, 5, 6, 7]}\n",
|
||||
" 'feature_selection__k': list(range(1, len(feature_importances) + 1))}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"grid_search_prep = GridSearchCV(prepare_select_and_predict_pipeline, param_grid, cv=5,\n",
|
||||
|
@ -2258,16 +2214,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Great! It seems that we had the right imputer strategy (median), and apparently only the top 7 features are useful (out of 9), the last 2 seem to just add some noise."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 130,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing.shape"
|
||||
"The best imputer strategy is `most_frequent` and apparently almost all features are useful (15 out of 16). The last one (`ISLAND`) seems to just add some noise."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2294,7 +2241,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
"version": "3.6.3"
|
||||
},
|
||||
"nav_menu": {
|
||||
"height": "279px",
|
||||
|
|
Loading…
Reference in New Issue