From c658c2b07c0550303fa4643587267b7cf333c19e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Mon, 15 Nov 2021 17:45:26 +1300
Subject: [PATCH] Move StandardScalerClone inverse_transform and
 get_feature_names_out to exercise

---
 02_end_to_end_machine_learning_project.ipynb | 389 +++++++++++++------
 1 file changed, 267 insertions(+), 122 deletions(-)

diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb
index 9971e97..e2657fb 100644
--- a/02_end_to_end_machine_learning_project.ipynb
+++ b/02_end_to_end_machine_learning_project.ipynb
@@ -1473,27 +1473,7 @@
     "        assert self.n_features_in_ == X.shape[1]\n",
     "        if self.with_mean:\n",
     "            X = X - self.mean_\n",
-    "        return X / self.scale_\n",
-    "    \n",
-    "    # not in the book (left as an exercise):\n",
-    "    def inverse_transform(self, X):\n",
-    "        check_is_fitted(self)\n",
-    "        X = check_array(X)\n",
-    "        assert self.n_features_in_ == X.shape[1]\n",
-    "        X = X * self.scale_\n",
-    "        return X + self.mean_ if self.with_mean else X\n",
-    "    \n",
-    "    # not in the book (left as an exercise):\n",
-    "    def get_feature_names_out(self, names=None):\n",
-    "        return names or getattr(self, \"feature_names_in_\",\n",
-    "                               [f\"x{i}\" for i in range(self.n_features_in_)])   "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's test our custom transformer:"
+    "        return X / self.scale_"
    ]
   },
   {
@@ -1501,30 +1481,6 @@
    "execution_count": 100,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Not in the book\n",
-    "from sklearn.utils.estimator_checks import check_estimator\n",
-    " \n",
-    "check_estimator(StandardScaler())\n",
-    "X = np.random.rand(1000, 3)\n",
-    "ss = StandardScaler()\n",
-    "ssc = StandardScalerClone()\n",
-    "X_scaled1 = ss.fit_transform(X)\n",
-    "X_scaled2 = ssc.fit_transform(X)\n",
-    "X_back1 = ss.inverse_transform(X_scaled1)\n",
-    "X_back2 = ssc.inverse_transform(X_scaled2)\n",
-    "assert np.allclose(X_scaled1, X_scaled2)\n",
-    "assert np.allclose(X_back1, X_back2)\n",
-    "assert ssc.n_features_in_ == 3\n",
-    "assert not hasattr(ssc, \"features_names_in_\")\n",
-    "assert ssc.get_feature_names_out() == [\"x0\", \"x1\", \"x2\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 101,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "from sklearn.cluster import KMeans\n",
     "\n",
@@ -1548,7 +1504,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
+   "execution_count": 101,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1559,7 +1515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 103,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1568,7 +1524,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 103,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1610,7 +1566,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 104,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1624,7 +1580,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 105,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1635,7 +1591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 106,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1648,7 +1604,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 107,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1658,7 +1614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 108,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1707,7 +1663,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 109,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1719,7 +1675,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 110,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1728,7 +1684,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 111,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1737,7 +1693,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 112,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1746,7 +1702,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1755,7 +1711,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1764,7 +1720,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 115,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1786,7 +1742,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": 116,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1800,7 +1756,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 117,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1809,7 +1765,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": 118,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1822,7 +1778,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 119,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1859,7 +1815,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 120,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1869,7 +1825,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 121,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1892,7 +1848,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 122,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1911,7 +1867,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 123,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1928,7 +1884,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 124,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1937,7 +1893,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1948,7 +1904,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 126,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1961,7 +1917,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 127,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1973,7 +1929,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 128,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1992,7 +1948,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 130,
+   "execution_count": 129,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2004,7 +1960,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 131,
+   "execution_count": 130,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2013,7 +1969,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 132,
+   "execution_count": 131,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2032,7 +1988,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 133,
+   "execution_count": 132,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2046,7 +2002,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 134,
+   "execution_count": 133,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2062,7 +2018,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 135,
+   "execution_count": 134,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2103,7 +2059,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 136,
+   "execution_count": 135,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2133,7 +2089,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 137,
+   "execution_count": 136,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2150,7 +2106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 138,
+   "execution_count": 137,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2159,7 +2115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 139,
+   "execution_count": 138,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2175,7 +2131,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 140,
+   "execution_count": 139,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2209,7 +2165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 141,
+   "execution_count": 140,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2226,7 +2182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 142,
+   "execution_count": 141,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2245,7 +2201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 143,
+   "execution_count": 142,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2282,7 +2238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 144,
+   "execution_count": 143,
    "metadata": {
     "tags": []
    },
@@ -2343,7 +2299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 145,
+   "execution_count": 144,
    "metadata": {
     "tags": []
    },
@@ -2406,7 +2362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 146,
+   "execution_count": 145,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2417,7 +2373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 147,
+   "execution_count": 146,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2435,7 +2391,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 148,
+   "execution_count": 147,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2457,7 +2413,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 149,
+   "execution_count": 148,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2479,7 +2435,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 150,
+   "execution_count": 149,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2500,7 +2456,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 151,
+   "execution_count": 150,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2526,7 +2482,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 152,
+   "execution_count": 151,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2544,7 +2500,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 153,
+   "execution_count": 152,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2569,7 +2525,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 154,
+   "execution_count": 153,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2601,12 +2557,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_Try a Support Vector Machine regressor (`sklearn.svm.SVR`) with various hyperparameters, such as `kernel=\"linear\"` (with various values for the `C` hyperparameter) or `kernel=\"rbf\"` (with various values for the `C` and `gamma` hyperparameters). Note that SVMs don't scale well to large datasets, so you should probably train your model on just the first 5,000 instances of the training set and use only 3-fold cross-validation, or else it will take hours. Don't worry about what the hyperparameters mean for now (see the SVM notebook if you're interested). How does the best `SVR` predictor perform?_"
+    "Exercise: _Try a Support Vector Machine regressor (`sklearn.svm.SVR`) with various hyperparameters, such as `kernel=\"linear\"` (with various values for the `C` hyperparameter) or `kernel=\"rbf\"` (with various values for the `C` and `gamma` hyperparameters). Note that SVMs don't scale well to large datasets, so you should probably train your model on just the first 5,000 instances of the training set and use only 3-fold cross-validation, or else it will take hours. Don't worry about what the hyperparameters mean for now (see the SVM notebook if you're interested). How does the best `SVR` predictor perform?_"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 155,
+   "execution_count": 154,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2636,7 +2592,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 155,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2653,7 +2609,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 157,
+   "execution_count": 156,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2678,7 +2634,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_Try replacing the `GridSearchCV` with a `RandomizedSearchCV`._"
+    "Exercise: _Try replacing the `GridSearchCV` with a `RandomizedSearchCV`._"
    ]
   },
   {
@@ -2690,7 +2646,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 158,
+   "execution_count": 157,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2724,7 +2680,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 159,
+   "execution_count": 158,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2741,7 +2697,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 160,
+   "execution_count": 159,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2764,7 +2720,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 161,
+   "execution_count": 160,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2792,7 +2748,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_Try adding a `SelectFromModel` transformer in the preparation pipeline to select only the most important attributes._"
+    "Exercise: _Try adding a `SelectFromModel` transformer in the preparation pipeline to select only the most important attributes._"
    ]
   },
   {
@@ -2804,7 +2760,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 162,
+   "execution_count": 161,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2822,7 +2778,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 163,
+   "execution_count": 162,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2852,7 +2808,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "_Try creating a custom transformer that trains a k-Nearest Neighbors regressor (`sklearn.neighbors.KNeighborsRegressor`) in its `fit()` method, and outputs the model's predictions in its `transform()` method. Then add this feature to the preprocessing pipeline, using latitude and longitude as the inputs to this transformer. This will add a feature in the model that corresponds to the housing median price of the nearest districts._"
+    "Exercise: _Try creating a custom transformer that trains a k-Nearest Neighbors regressor (`sklearn.neighbors.KNeighborsRegressor`) in its `fit()` method, and outputs the model's predictions in its `transform()` method. Then add this feature to the preprocessing pipeline, using latitude and longitude as the inputs to this transformer. This will add a feature in the model that corresponds to the housing median price of the nearest districts._"
    ]
   },
   {
@@ -2864,7 +2820,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 164,
+   "execution_count": 163,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2909,7 +2865,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 165,
+   "execution_count": 164,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2925,7 +2881,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 166,
+   "execution_count": 165,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2944,7 +2900,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 167,
+   "execution_count": 166,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2960,7 +2916,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 168,
+   "execution_count": 167,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2976,7 +2932,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 169,
+   "execution_count": 168,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2990,7 +2946,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 170,
+   "execution_count": 169,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3020,12 +2976,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Question: Automatically explore some preparation options using `RandomSearchCV`."
+    "Exercise: _Automatically explore some preparation options using `RandomSearchCV`._"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 171,
+   "execution_count": 170,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3047,7 +3003,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 172,
+   "execution_count": 171,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3066,7 +3022,196 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "That's all for today! 😀"
+    "## 6."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Exercise: _Try to implement the `StandardScalerClone` class again from scratch, then add support for the `inverse_transform()` method: executing `scaler.inverse_transform(scaler.fit_transform(X))` should return an array very close to `X`. Then add support for feature names: set `feature_names_in_` in the `fit()` method if the input is a DataFrame. This attribute should be a NumPy array of column names. Lastly, implement the `get_feature_names_out()` method: it should have one optional `input_features=None` argument. If passed, the method should check that its length matches `n_features_in_`, and it should match `feature_names_in_` if it is defined, then `input_features` should be returned. If `input_features` is `None`, then the method should return `feature_names_in_` if it is defined or `np.array([\"x0\", \"x1\", ...])` with length `n_features_in_` otherwise._"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 172,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.base import BaseEstimator, TransformerMixin\n",
+    "from sklearn.utils.validation import check_array, check_is_fitted\n",
+    "\n",
+    "class StandardScalerClone(BaseEstimator, TransformerMixin):\n",
+    "    def __init__(self, with_mean=True):  # no *args or **kwargs!\n",
+    "        self.with_mean = with_mean\n",
+    "\n",
+    "    def fit(self, X, y=None):  # y is required even though we don't use it\n",
+    "        X = check_array(X)  # checks that X is an array with finite float values\n",
+    "        self.mean_ = X.mean(axis=0)\n",
+    "        self.scale_ = X.std(axis=0)\n",
+    "        self.n_features_in_ = X.shape[1]  # every estimator stores this in fit()\n",
+    "        if hasattr(X, \"columns\"):\n",
+    "            self.feature_names_in_ = np.array(X.columns, np.object)\n",
+    "        return self  # always return self!\n",
+    "\n",
+    "    def transform(self, X):\n",
+    "        check_is_fitted(self)  # looks for learned attributes (with trailing _)\n",
+    "        X = check_array(X)\n",
+    "        if self.n_features_in_ != X.shape[1]:\n",
+    "            raise ValueError(\"Unexpected number of features\")\n",
+    "        if self.with_mean:\n",
+    "            X = X - self.mean_\n",
+    "        return X / self.scale_\n",
+    "    \n",
+    "    def inverse_transform(self, X):\n",
+    "        check_is_fitted(self)\n",
+    "        X = check_array(X)\n",
+    "        if self.n_features_in_ != X.shape[1]:\n",
+    "            raise ValueError(\"Unexpected number of features\")\n",
+    "        X = X * self.scale_\n",
+    "        return X + self.mean_ if self.with_mean else X\n",
+    "    \n",
+    "    def get_feature_names_out(self, input_features=None):\n",
+    "        if input_features is None:\n",
+    "            return getattr(self, \"feature_names_in_\",\n",
+    "                           [f\"x{i}\" for i in range(self.n_features_in_)])\n",
+    "        else:\n",
+    "            if len(input_features) != self.n_features_in_:\n",
+    "                raise ValueError(\"Invalid number of features\")\n",
+    "            if hasattr(self, \"feature_names_in_\") and not np.all(\n",
+    "                self.feature_names_in_ == input_features\n",
+    "            ):\n",
+    "                raise ValueError(\"input_features ≠ feature_names_in_\")\n",
+    "            return input_features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's test our custom transformer:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 173,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.utils.estimator_checks import check_estimator\n",
+    " \n",
+    "check_estimator(StandardScalerClone())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "No errors, that's a great start, we respect the Scikit-Learn API."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's ensure we the transformation works as expected:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 174,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(42)\n",
+    "X = np.random.rand(1000, 3)\n",
+    "\n",
+    "scaler = StandardScalerClone()\n",
+    "X_scaled = scaler.fit_transform(X)\n",
+    "\n",
+    "assert np.allclose(X_scaled, (X - X.mean(axis=0)) / X.std(axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "How about setting `with_mean=False`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 175,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scaler = StandardScalerClone(with_mean=False)\n",
+    "X_scaled_uncentered = scaler.fit_transform(X)\n",
+    "\n",
+    "assert np.allclose(X_scaled_uncentered, X / X.std(axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And does the inverse work?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 176,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scaler = StandardScalerClone()\n",
+    "X_back = scaler.inverse_transform(scaler.fit_transform(X))\n",
+    "assert np.allclose(X, X_back)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "How about the feature names out?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 177,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert np.all(scaler.get_feature_names_out() == [\"x0\", \"x1\", \"x2\"])\n",
+    "assert np.all(scaler.get_feature_names_out([\"a\", \"b\", \"c\"]) == [\"a\", \"b\", \"c\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And if we fit a DataFrame, are the feature in and out ok?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 178,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame({\"a\": np.random.rand(100), \"b\": np.random.rand(100)})\n",
+    "scaler = StandardScalerClone()\n",
+    "X_scaled = scaler.fit_transform(df)\n",
+    "\n",
+    "assert np.all(ss.feature_names_in_ == [\"a\", \"b\"])\n",
+    "assert np.all(ss.get_feature_names_out() == [\"a\", \"b\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "All good! That's all for today! 😀"
    ]
   },
   {