diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index e2657fb..29de8c3 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -1803,7 +1803,7 @@ " [\"total_bedrooms\", \"total_rooms\"]),\n", " (\"rooms_per_house\", ratio_pipeline(\"rooms_per_house\"),\n", " [\"total_rooms\", \"households\"]),\n", - " (\"people_per_house\", ratio_pipeline(\"bedrooms_ratio\"),\n", + " (\"people_per_house\", ratio_pipeline(\"people_per_house\"),\n", " [\"population\", \"households\"]),\n", " (\"log\", log_pipeline, [\"total_bedrooms\", \"total_rooms\",\n", " \"population\", \"households\", \"median_income\"]),\n", @@ -2869,6 +2869,8 @@ "metadata": {}, "outputs": [], "source": [ + "from sklearn.utils.estimator_checks import check_estimator\n", + "\n", "check_estimator(FeatureFromRegressor(KNeighborsRegressor()))" ] }, @@ -3046,12 +3048,13 @@ " self.with_mean = with_mean\n", "\n", " def fit(self, X, y=None): # y is required even though we don't use it\n", + " X_orig = X\n", " X = check_array(X) # checks that X is an array with finite float values\n", " self.mean_ = X.mean(axis=0)\n", " self.scale_ = X.std(axis=0)\n", " self.n_features_in_ = X.shape[1] # every estimator stores this in fit()\n", - " if hasattr(X, \"columns\"):\n", - " self.feature_names_in_ = np.array(X.columns, np.object)\n", + " if hasattr(X_orig, \"columns\"):\n", + " self.feature_names_in_ = np.array(X_orig.columns, dtype=np.object)\n", " return self # always return self!\n", "\n", " def transform(self, X):\n", @@ -3203,8 +3206,8 @@ "scaler = StandardScalerClone()\n", "X_scaled = scaler.fit_transform(df)\n", "\n", - "assert np.all(ss.feature_names_in_ == [\"a\", \"b\"])\n", - "assert np.all(ss.get_feature_names_out() == [\"a\", \"b\"])" + "assert np.all(scaler.feature_names_in_ == [\"a\", \"b\"])\n", + "assert np.all(scaler.get_feature_names_out() == [\"a\", \"b\"])" ] }, {