From 5f6be6fa6f0e595fe96503c9c38bec7103122b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Fri, 15 Mar 2019 23:49:03 +0800 Subject: [PATCH] Use pd.cut() for income_cat --- 02_end_to_end_machine_learning_project.ipynb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index a9ff862..7acc3d8 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -338,10 +338,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Divide by 1.5 to limit the number of income categories\n", - "housing[\"income_cat\"] = np.ceil(housing[\"median_income\"] / 1.5)\n", - "# Label those above 5 as 5\n", - "housing[\"income_cat\"].where(housing[\"income_cat\"] < 5, 5.0, inplace=True)" + "housing[\"income_cat\"] = pd.cut(housing[\"median_income\"],\n", + " bins=[0., 1.5, 3.0, 4.5, 6., np.inf],\n", + " labels=[1, 2, 3, 4, 5])" ] }, {