From f558bf43e52a62c65ac5c36c613f11923b06e4ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= <ageron@users.noreply.github.com>
Date: Fri, 5 Jan 2018 14:36:11 +0100
Subject: [PATCH] Upgrade to latest pandas version, update resampling API

---
 tools_pandas.ipynb | 923 +++++++++++++++++----------------------------
 1 file changed, 340 insertions(+), 583 deletions(-)

diff --git a/tools_pandas.ipynb b/tools_pandas.ipynb
index 379443e..6580f20 100644
--- a/tools_pandas.ipynb
+++ b/tools_pandas.ipynb
@@ -23,9 +23,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from __future__ import division, print_function, unicode_literals"
@@ -41,9 +39,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd"
@@ -71,9 +67,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s = pd.Series([2,-1,3,5])\n",
@@ -91,9 +85,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -110,9 +102,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s + [1000,2000,3000,4000]"
@@ -128,9 +118,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s + 1000"
@@ -146,9 +134,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s < 0"
@@ -165,9 +151,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s2 = pd.Series([68, 83, 112, 68], index=[\"alice\", \"bob\", \"charles\", \"darwin\"])\n",
@@ -184,9 +168,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s2[\"bob\"]"
@@ -202,9 +184,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s2[1]"
@@ -220,9 +200,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s2.loc[\"bob\"]"
@@ -231,9 +209,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s2.iloc[1]"
@@ -249,9 +225,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s2.iloc[1:3]"
@@ -267,9 +241,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "surprise = pd.Series([1000, 1001, 1002, 1003])\n",
@@ -279,9 +251,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "surprise_slice = surprise[2:]\n",
@@ -298,9 +268,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
@@ -319,9 +287,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "surprise_slice.iloc[0]"
@@ -338,9 +304,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "weights = {\"alice\": 68, \"bob\": 83, \"colin\": 86, \"darwin\": 68}\n",
@@ -358,9 +322,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s4 = pd.Series(weights, index = [\"colin\", \"alice\"])\n",
@@ -378,9 +340,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "print(s2.keys())\n",
@@ -401,9 +361,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s5 = pd.Series([1000,1000,1000,1000])\n",
@@ -431,9 +389,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "meaning = pd.Series(42, [\"life\", \"universe\", \"everything\"])\n",
@@ -451,9 +407,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "s6 = pd.Series([83, 68], index=[\"bob\", \"alice\"], name=\"weights\")\n",
@@ -465,14 +419,13 @@
    "metadata": {},
    "source": [
     "## Plotting a `Series`\n",
-    "Pandas makes it easy to plot `Series` data using matplotlib (for more details on matplotlib, check out the [matplotlib tutorial](tools_matplotlib.ipynb)). Just import matplotlib and call the `plot` method:"
+    "Pandas makes it easy to plot `Series` data using matplotlib (for more details on matplotlib, check out the [matplotlib tutorial](tools_matplotlib.ipynb)). Just import matplotlib and call the `plot()` method:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 24,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -504,15 +457,13 @@
     "* it can handle timezones.\n",
     "\n",
     "## Time range\n",
-    "Let's start by creating a time series using `timerange`. This returns a `DatetimeIndex` containing one datetime per hour for 12 hours starting on October 29th 2016 at 5:30pm."
+    "Let's start by creating a time series using `pd.date_range()`. This returns a `DatetimeIndex` containing one datetime per hour for 12 hours starting on October 29th 2016 at 5:30pm."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 25,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "dates = pd.date_range('2016/10/29 5:30pm', periods=12, freq='H')\n",
@@ -529,9 +480,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series = pd.Series(temperatures, dates)\n",
@@ -548,9 +497,7 @@
   {
    "cell_type": "code",
    "execution_count": 27,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series.plot(kind=\"bar\")\n",
@@ -564,15 +511,13 @@
    "metadata": {},
    "source": [
     "## Resampling\n",
-    "Pandas let's us resample a time series very simply. Just call the `resample` method and specify a new frequency:"
+    "Pandas lets us resample a time series very simply. Just call the `resample()` method and specify a new frequency:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 28,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series_freq_2H = temp_series.resample(\"2H\")\n",
@@ -583,15 +528,29 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's take a look at the result:"
+    "The resampling operation is actually a deferred operation, which is why we did not get a `Series` object, but a `DatetimeIndexResampler` object instead. To actually perform the resampling operation, we can simply call the `mean()` method: Pandas will compute the mean of every pair of consecutive hours:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 29,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "temp_series_freq_2H = temp_series_freq_2H.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's plot the result:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series_freq_2H.plot(kind=\"bar\")\n",
@@ -602,18 +561,33 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note how the values have automatically been aggregated into 2-hour periods. If we look at the 6-8pm period, for example, we had a value of `5.1` at 6:30pm, and `6.1` at 7:30pm. After resampling, we just have one value of `5.6`, which is the mean of `5.1` and `6.1`. Computing the mean is the default behavior, but it is also possible to use a different aggregation function, for example we can decide to keep the minimum value of each period:"
+    "Note how the values have automatically been aggregated into 2-hour periods. If we look at the 6-8pm period, for example, we had a value of `5.1` at 6:30pm, and `6.1` at 7:30pm. After resampling, we just have one value of `5.6`, which is the mean of `5.1` and `6.1`. Rather than computing the mean, we could have used any other aggregation function, for example we can decide to keep the minimum value of each period:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 31,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "temp_series_freq_2H = temp_series.resample(\"2H\", how=np.min)\n",
+    "temp_series_freq_2H = temp_series.resample(\"2H\").min()\n",
+    "temp_series_freq_2H"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Or, equivalently, we could use the `apply()` method instead:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "temp_series_freq_2H = temp_series.resample(\"2H\").apply(np.min)\n",
     "temp_series_freq_2H"
    ]
   },
@@ -627,13 +601,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 33,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "temp_series_freq_15min = temp_series.resample(\"15Min\")\n",
+    "temp_series_freq_15min = temp_series.resample(\"15Min\").mean()\n",
     "temp_series_freq_15min.head(n=10) # `head` displays the top n values"
    ]
   },
@@ -641,14 +613,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "One solution is to fill the gaps by interpolating. We just call the `interpolate` method. The default is to use linear interpolation, but we can also select another method, such as cubic interpolation:"
+    "One solution is to fill the gaps by interpolating. We just call the `interpolate()` method. The default is to use linear interpolation, but we can also select another method, such as cubic interpolation:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 34,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -659,10 +630,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 35,
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series.plot(label=\"Period: 1 hour\")\n",
@@ -676,15 +645,13 @@
    "metadata": {},
    "source": [
     "## Timezones\n",
-    "By default datetimes are *naive*: they are not aware of timezones, so 2016-10-30 02:30 might mean October 30th 2016 at 2:30am in Paris or in New York. We can make datetimes timezone *aware* by calling the `tz_localize` method:"
+    "By default datetimes are *naive*: they are not aware of timezones, so 2016-10-30 02:30 might mean October 30th 2016 at 2:30am in Paris or in New York. We can make datetimes timezone *aware* by calling the `tz_localize()` method:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 36,
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series_ny = temp_series.tz_localize(\"America/New_York\")\n",
@@ -702,10 +669,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 37,
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series_paris = temp_series_ny.tz_convert(\"Europe/Paris\")\n",
@@ -721,10 +686,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 38,
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series_paris_naive = temp_series_paris.tz_localize(None)\n",
@@ -740,10 +703,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 39,
+   "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
@@ -762,10 +723,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 40,
+   "metadata": {},
    "outputs": [],
    "source": [
     "temp_series_paris_naive.tz_localize(\"Europe/Paris\", ambiguous=\"infer\")"
@@ -776,15 +735,13 @@
    "metadata": {},
    "source": [
     "## Periods\n",
-    "The `period_range` function returns a `PeriodIndex` instead of a `DatetimeIndex`. For example, let's get all quarters in 2016 and 2017:"
+    "The `pd.period_range()` function returns a `PeriodIndex` instead of a `DatetimeIndex`. For example, let's get all quarters in 2016 and 2017:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 41,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarters = pd.period_range('2016Q1', periods=8, freq='Q')\n",
@@ -800,10 +757,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 42,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarters + 3"
@@ -813,15 +768,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `asfreq` method lets us change the frequency of the `PeriodIndex`. All periods are lengthened or shortened accordingly. For example, let's convert all the quarterly periods to monthly periods (zooming in):"
+    "The `asfreq()` method lets us change the frequency of the `PeriodIndex`. All periods are lengthened or shortened accordingly. For example, let's convert all the quarterly periods to monthly periods (zooming in):"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 43,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarters.asfreq(\"M\")"
@@ -836,10 +789,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 44,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarters.asfreq(\"M\", how=\"start\")"
@@ -854,10 +805,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 45,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarters.asfreq(\"A\")"
@@ -872,10 +821,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 46,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarterly_revenue = pd.Series([300, 320, 290, 390, 320, 360, 310, 410], index = quarters)\n",
@@ -884,10 +831,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 47,
+   "metadata": {},
    "outputs": [],
    "source": [
     "quarterly_revenue.plot(kind=\"line\")\n",
@@ -903,10 +848,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 48,
+   "metadata": {},
    "outputs": [],
    "source": [
     "last_hours = quarterly_revenue.to_timestamp(how=\"end\", freq=\"H\")\n",
@@ -922,10 +865,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 49,
+   "metadata": {},
    "outputs": [],
    "source": [
     "last_hours.to_period()"
@@ -940,10 +881,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 50,
+   "metadata": {},
    "outputs": [],
    "source": [
     "months_2016 = pd.period_range(\"2016\", periods=12, freq=\"M\")\n",
@@ -965,10 +904,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 51,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people_dict = {\n",
@@ -1001,10 +938,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 52,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people[\"birthyear\"]"
@@ -1019,10 +954,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 53,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people[[\"birthyear\", \"hobby\"]]"
@@ -1037,10 +970,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 54,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d2 = pd.DataFrame(\n",
@@ -1060,10 +991,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 55,
+   "metadata": {},
    "outputs": [],
    "source": [
     "values = [\n",
@@ -1088,16 +1017,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 56,
+   "metadata": {},
    "outputs": [],
    "source": [
     "masked_array = np.ma.asarray(values, dtype=np.object)\n",
     "masked_array[(0, 2), (1, 2)] = np.ma.masked\n",
     "d3 = pd.DataFrame(\n",
-    "        values,\n",
+    "        masked_array,\n",
     "        columns=[\"birthyear\", \"children\", \"hobby\", \"weight\"],\n",
     "        index=[\"alice\", \"bob\", \"charles\"]\n",
     "     )\n",
@@ -1113,10 +1040,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 57,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d4 = pd.DataFrame(\n",
@@ -1136,10 +1061,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 58,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people = pd.DataFrame({\n",
@@ -1161,10 +1084,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 59,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d5 = pd.DataFrame(\n",
@@ -1191,10 +1112,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 60,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d5[\"public\"]"
@@ -1202,13 +1121,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 61,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "d5[\"public\", \"hobby\"]  # Same result as d4[\"public\"][\"hobby\"]"
+    "d5[\"public\", \"hobby\"]  # Same result as d5[\"public\"][\"hobby\"]"
    ]
   },
   {
@@ -1221,10 +1138,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 62,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d5"
@@ -1234,15 +1149,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "There are two levels of columns, and two levels of indices. We can drop a column level by calling `droplevel` (the same goes for indices):"
+    "There are two levels of columns, and two levels of indices. We can drop a column level by calling `droplevel()` (the same goes for indices):"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 63,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d5.columns = d5.columns.droplevel(level = 0)\n",
@@ -1259,10 +1172,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 64,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d6 = d5.T\n",
@@ -1274,15 +1185,13 @@
    "metadata": {},
    "source": [
     "## Stacking and unstacking levels\n",
-    "Calling the `stack` method will push the lowest column level after the lowest index:"
+    "Calling the `stack()` method will push the lowest column level after the lowest index:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 65,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d7 = d6.stack()\n",
@@ -1295,15 +1204,13 @@
    "source": [
     "Note that many `NaN` values appeared. This makes sense because many new combinations did not exist before (eg. there was no `bob` in `London`).\n",
     "\n",
-    "Calling `unstack` will do the reverse, once again creating many `NaN` values."
+    "Calling `unstack()` will do the reverse, once again creating many `NaN` values."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 66,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d8 = d7.unstack()\n",
@@ -1319,10 +1226,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 67,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d9 = d8.unstack()\n",
@@ -1333,14 +1238,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `stack` and `unstack` methods let you select the `level` to stack/unstack. You can even stack/unstack multiple levels at once:"
+    "The `stack()` and `unstack()` methods let you select the `level` to stack/unstack. You can even stack/unstack multiple levels at once:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 68,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -1354,7 +1258,7 @@
    "metadata": {},
    "source": [
     "## Most methods return modified copies\n",
-    "As you may have noticed, the `stack` and `unstack` methods do not modify the object they apply to. Instead, they work on a copy and return that copy. This is true of most methods in pandas."
+    "As you may have noticed, the `stack()` and `unstack()` methods do not modify the object they apply to. Instead, they work on a copy and return that copy. This is true of most methods in pandas."
    ]
   },
   {
@@ -1367,10 +1271,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 69,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people"
@@ -1380,15 +1282,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `loc` attribute lets you access rows instead of columns. The result is `Series` object in which the `DataFrame`'s column names are mapped to row index labels:"
+    "The `loc` attribute lets you access rows instead of columns. The result is a `Series` object in which the `DataFrame`'s column names are mapped to row index labels:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 70,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.loc[\"charles\"]"
@@ -1403,10 +1303,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 71,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.iloc[2]"
@@ -1421,10 +1319,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 72,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.iloc[1:3]"
@@ -1439,10 +1335,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 73,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people[np.array([True, False, True])]"
@@ -1457,10 +1351,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 74,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people[people[\"birthyear\"] < 1990]"
@@ -1476,10 +1368,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 75,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people"
@@ -1487,13 +1377,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 76,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "people[\"age\"] = 2016 - people[\"birthyear\"]  # adds a new column \"age\"\n",
+    "people[\"age\"] = 2018 - people[\"birthyear\"]  # adds a new column \"age\"\n",
     "people[\"over 30\"] = people[\"age\"] > 30      # adds another column \"over 30\"\n",
     "birthyears = people.pop(\"birthyear\")\n",
     "del people[\"children\"]\n",
@@ -1503,10 +1391,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 77,
+   "metadata": {},
    "outputs": [],
    "source": [
     "birthyears"
@@ -1521,10 +1407,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 78,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people[\"pets\"] = pd.Series({\"bob\": 0, \"charles\": 5, \"eugene\":1})  # alice is missing, eugene is ignored\n",
@@ -1535,15 +1419,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "When adding a new column, it is added at the end (on the right) by default. You can also insert a column anywhere else using the `insert` method:"
+    "When adding a new column, it is added at the end (on the right) by default. You can also insert a column anywhere else using the `insert()` method:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 79,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.insert(1, \"height\", [172, 181, 185])\n",
@@ -1555,15 +1437,13 @@
    "metadata": {},
    "source": [
     "## Assigning new columns\n",
-    "You can also create new columns by calling the `assign` method. Note that this returns a new `DataFrame` object, the original is not modified:"
+    "You can also create new columns by calling the `assign()` method. Note that this returns a new `DataFrame` object, the original is not modified:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 80,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.assign(\n",
@@ -1581,10 +1461,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 81,
+   "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
@@ -1605,10 +1483,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 82,
+   "metadata": {},
    "outputs": [],
    "source": [
     "d6 = people.assign(body_mass_index = people[\"weight\"] / (people[\"height\"] / 100) ** 2)\n",
@@ -1624,10 +1500,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 83,
+   "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
@@ -1643,15 +1517,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "But fear not, there is a simple solution. You can pass a function to the `assign` method (typically a `lambda` function), and this function will be called with the `DataFrame` as a parameter:"
+    "But fear not, there is a simple solution. You can pass a function to the `assign()` method (typically a `lambda` function), and this function will be called with the `DataFrame` as a parameter:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 84,
+   "metadata": {},
    "outputs": [],
    "source": [
     "(people\n",
@@ -1677,10 +1549,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 85,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.eval(\"weight / (height/100) ** 2 > 25\")"
@@ -1690,18 +1560,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Assignment expressions are also supported, and contrary to the `assign` method, this does not create a copy of the `DataFrame`, instead it directly modifies it:"
+    "Assignment expressions are also supported. Let's set `inplace=True` to directly modify the `DataFrame` rather than getting a modified copy:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 86,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "people.eval(\"body_mass_index = weight / (height/100) ** 2\")\n",
+    "people.eval(\"body_mass_index = weight / (height/100) ** 2\", inplace=True)\n",
     "people"
    ]
   },
@@ -1714,14 +1582,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 87,
+   "metadata": {},
    "outputs": [],
    "source": [
     "overweight_threshold = 30\n",
-    "people.eval(\"overweight = body_mass_index > @overweight_threshold\")\n",
+    "people.eval(\"overweight = body_mass_index > @overweight_threshold\", inplace=True)\n",
     "people"
    ]
   },
@@ -1730,15 +1596,13 @@
    "metadata": {},
    "source": [
     "## Querying a `DataFrame`\n",
-    "The `query` method lets you filter a `DataFrame` based on a query expression:"
+    "The `query()` method lets you filter a `DataFrame` based on a query expression:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 88,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.query(\"age > 30 and pets == 0\")"
@@ -1754,10 +1618,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 89,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.sort_index(ascending=False)"
@@ -1772,10 +1634,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 90,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.sort_index(axis=1, inplace=True)\n",
@@ -1791,10 +1651,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 89,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 91,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.sort_values(by=\"age\", inplace=True)\n",
@@ -1813,10 +1671,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 92,
+   "metadata": {},
    "outputs": [],
    "source": [
     "people.plot(kind = \"line\", x = \"body_mass_index\", y = [\"height\", \"weight\"])\n",
@@ -1827,14 +1683,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can pass extra arguments supported by matplotlib's functions. For example, we can create scatterplot and pass it a list of sizes using the `s` argument of matplotlib's `scatter` function:"
+    "You can pass extra arguments supported by matplotlib's functions. For example, we can create scatterplot and pass it a list of sizes using the `s` argument of matplotlib's `scatter()` function:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 93,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -1860,10 +1715,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 94,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grades_array = np.array([[8,8,9],[10,9,9],[4, 8, 2], [9, 10, 10]])\n",
@@ -1880,10 +1733,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 95,
+   "metadata": {},
    "outputs": [],
    "source": [
     "np.sqrt(grades)"
@@ -1898,10 +1749,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 96,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grades + 1"
@@ -1916,9 +1765,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 97,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [],
@@ -1935,10 +1783,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 98,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grades.mean()"
@@ -1953,10 +1799,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 99,
+   "metadata": {},
    "outputs": [],
    "source": [
     "(grades > 5).all()"
@@ -1971,10 +1815,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 100,
+   "metadata": {},
    "outputs": [],
    "source": [
     "(grades > 5).all(axis = 1)"
@@ -1989,10 +1831,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 101,
+   "metadata": {},
    "outputs": [],
    "source": [
     "(grades == 10).any(axis = 1)"
@@ -2007,10 +1847,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 102,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grades - grades.mean()  # equivalent to: grades - [7.75, 8.75, 7.50]"
@@ -2025,10 +1863,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 101,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 103,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.DataFrame([[7.75, 8.75, 7.50]]*4, index=grades.index, columns=grades.columns)"
@@ -2043,9 +1879,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
+   "execution_count": 104,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2063,10 +1898,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 103,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 105,
+   "metadata": {},
    "outputs": [],
    "source": [
     "bonus_array = np.array([[0,np.nan,2],[np.nan,1,0],[0, 1, 0], [3, 3, 0]])\n",
@@ -2076,9 +1909,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 106,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2095,14 +1927,13 @@
     "## Handling missing data\n",
     "Dealing with missing data is a frequent task when working with real life data. Pandas offers a few tools to handle missing data.\n",
     " \n",
-    "Let's try to fix the problem above. For example, we can decide that missing data should result in a zero, instead of `NaN`. We can replace all `NaN` values by a any value using the `fillna` method:"
+    "Let's try to fix the problem above. For example, we can decide that missing data should result in a zero, instead of `NaN`. We can replace all `NaN` values by a any value using the `fillna()` method:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 107,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2119,10 +1950,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 106,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 108,
+   "metadata": {},
    "outputs": [],
    "source": [
     "fixed_bonus_points = bonus_points.fillna(0)\n",
@@ -2142,10 +1971,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 107,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 109,
+   "metadata": {},
    "outputs": [],
    "source": [
     "bonus_points"
@@ -2160,9 +1987,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 110,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [],
@@ -2179,10 +2005,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 111,
+   "metadata": {},
    "outputs": [],
    "source": [
     "better_bonus_points = bonus_points.copy()\n",
@@ -2201,10 +2025,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 112,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grades + better_bonus_points"
@@ -2219,9 +2041,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 113,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2235,15 +2056,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "There's not much we can do about December and Colin: it's bad enough that we are making up bonus points, but we can't reasonably make up grades (well I guess some teachers probably do). So let's call the `dropna` method to get rid of rows that are full of `NaN`s:"
+    "There's not much we can do about December and Colin: it's bad enough that we are making up bonus points, but we can't reasonably make up grades (well I guess some teachers probably do). So let's call the `dropna()` method to get rid of rows that are full of `NaN`s:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 114,
+   "metadata": {},
    "outputs": [],
    "source": [
     "final_grades_clean = final_grades.dropna(how=\"all\")\n",
@@ -2259,10 +2078,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 115,
+   "metadata": {},
    "outputs": [],
    "source": [
     "final_grades_clean = final_grades_clean.dropna(axis=1, how=\"all\")\n",
@@ -2281,9 +2098,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 116,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2301,10 +2117,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 115,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 117,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grouped_grades = final_grades.groupby(\"hobby\")\n",
@@ -2320,10 +2134,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 118,
+   "metadata": {},
    "outputs": [],
    "source": [
     "grouped_grades.mean()"
@@ -2346,10 +2158,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 117,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 119,
+   "metadata": {},
    "outputs": [],
    "source": [
     "bonus_points"
@@ -2357,10 +2167,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 120,
+   "metadata": {},
    "outputs": [],
    "source": [
     "more_grades = final_grades_clean.stack().reset_index()\n",
@@ -2373,15 +2181,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now we can call the `pivot_table` function for this `DataFrame`, asking to group by the `name` column. By default, `pivot_table` computes the `mean` of each numeric column:"
+    "Now we can call the `pd.pivot_table()` function for this `DataFrame`, asking to group by the `name` column. By default, `pivot_table()` computes the mean of each numeric column:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 121,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.pivot_table(more_grades, index=\"name\")"
@@ -2391,15 +2197,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can change the aggregation function by setting the `aggfunc` attribute, and we can also specify the list of columns whose values will be aggregated:"
+    "We can change the aggregation function by setting the `aggfunc` argument, and we can also specify the list of columns whose values will be aggregated:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 122,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.pivot_table(more_grades, index=\"name\", values=[\"grade\",\"bonus\"], aggfunc=np.max)"
@@ -2414,10 +2218,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 123,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.pivot_table(more_grades, index=\"name\", values=\"grade\", columns=\"month\", margins=True)"
@@ -2432,10 +2234,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 124,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.pivot_table(more_grades, index=(\"name\", \"month\"), margins=True)"
@@ -2451,9 +2251,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 125,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [],
@@ -2469,14 +2268,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `head` method returns the top 5 rows:"
+    "The `head()` method returns the top 5 rows:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 126,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [],
@@ -2488,15 +2286,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Of course there's also a `tail` function to view the bottom 5 rows. You can pass the number of rows you want:"
+    "Of course there's also a `tail()` function to view the bottom 5 rows. You can pass the number of rows you want:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 127,
+   "metadata": {},
    "outputs": [],
    "source": [
     "large_df.tail(n=2)"
@@ -2506,14 +2302,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `info` method prints out a summary of each columns contents:"
+    "The `info()` method prints out a summary of each columns contents:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 128,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [],
@@ -2525,7 +2320,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Finally, the `describe` method gives a nice overview of the main aggregated values over each column:\n",
+    "Finally, the `describe()` method gives a nice overview of the main aggregated values over each column:\n",
     "* `count`: number of non-null (not NaN) values\n",
     "* `mean`: mean of non-null values\n",
     "* `std`: [standard deviation](https://en.wikipedia.org/wiki/Standard_deviation) of non-null values\n",
@@ -2536,9 +2331,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 129,
    "metadata": {
-    "collapsed": false,
     "scrolled": false
    },
    "outputs": [],
@@ -2556,10 +2350,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 130,
+   "metadata": {},
    "outputs": [],
    "source": [
     "my_df = pd.DataFrame(\n",
@@ -2580,10 +2372,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 131,
+   "metadata": {},
    "outputs": [],
    "source": [
     "my_df.to_csv(\"my_df.csv\")\n",
@@ -2600,10 +2390,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 130,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 132,
+   "metadata": {},
    "outputs": [],
    "source": [
     "for filename in (\"my_df.csv\", \"my_df.html\", \"my_df.json\"):\n",
@@ -2624,10 +2412,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 131,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 133,
+   "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
@@ -2646,10 +2432,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 132,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 134,
+   "metadata": {},
    "outputs": [],
    "source": [
     "my_df_loaded = pd.read_csv(\"my_df.csv\", index_col=0)\n",
@@ -2665,10 +2449,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 133,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 135,
+   "metadata": {},
    "outputs": [],
    "source": [
     "us_cities = None\n",
@@ -2700,10 +2482,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 134,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 136,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_loc = pd.DataFrame(\n",
@@ -2719,10 +2499,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 135,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 137,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_pop = pd.DataFrame(\n",
@@ -2739,15 +2517,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now let's join these `DataFrame`s using the `merge` function:"
+    "Now let's join these `DataFrame`s using the `merge()` function:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 136,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 138,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.merge(left=city_loc, right=city_pop, on=\"city\")"
@@ -2764,10 +2540,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 137,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 139,
+   "metadata": {},
    "outputs": [],
    "source": [
     "all_cities = pd.merge(left=city_loc, right=city_pop, on=\"city\", how=\"outer\")\n",
@@ -2783,10 +2557,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 138,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 140,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.merge(left=city_loc, right=city_pop, on=\"city\", how=\"right\")"
@@ -2801,10 +2573,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 139,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 141,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_pop2 = city_pop.copy()\n",
@@ -2817,15 +2587,13 @@
    "metadata": {},
    "source": [
     "## Concatenation\n",
-    "Rather than joining `DataFrame`s, we may just want to concatenate them. That's what `concat` is for:"
+    "Rather than joining `DataFrame`s, we may just want to concatenate them. That's what `concat()` is for:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 140,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 142,
+   "metadata": {},
    "outputs": [],
    "source": [
     "result_concat = pd.concat([city_loc, city_pop])\n",
@@ -2841,10 +2609,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 141,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 143,
+   "metadata": {},
    "outputs": [],
    "source": [
     "result_concat.loc[3]"
@@ -2859,10 +2625,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 142,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 144,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.concat([city_loc, city_pop], ignore_index=True)"
@@ -2877,10 +2641,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 143,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 145,
+   "metadata": {},
    "outputs": [],
    "source": [
     "pd.concat([city_loc, city_pop], join=\"inner\")"
@@ -2895,9 +2657,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 144,
+   "execution_count": 146,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2914,9 +2675,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 145,
+   "execution_count": 147,
    "metadata": {
-    "collapsed": false,
     "scrolled": true
    },
    "outputs": [],
@@ -2935,15 +2695,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The `append` method is a useful shorthand for concatenating `DataFrame`s vertically:"
+    "The `append()` method is a useful shorthand for concatenating `DataFrame`s vertically:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 146,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 148,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_loc.append(city_pop)"
@@ -2953,7 +2711,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As always in pandas, the `append` method does *not* actually modify `city_loc`: it works on a copy and returns the modified copy."
+    "As always in pandas, the `append()` method does *not* actually modify `city_loc`: it works on a copy and returns the modified copy."
    ]
   },
   {
@@ -2966,10 +2724,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 147,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 149,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_eco = city_pop.copy()\n",
@@ -2986,10 +2742,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 148,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 150,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_eco[\"economy\"] = city_eco[\"eco_code\"].astype('category')\n",
@@ -3005,10 +2759,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 149,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 151,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_eco[\"economy\"].cat.categories = [\"Finance\", \"Energy\", \"Tourism\"]\n",
@@ -3024,10 +2776,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 150,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 152,
+   "metadata": {},
    "outputs": [],
    "source": [
     "city_eco.sort_values(by=\"economy\", ascending=False)"
@@ -3042,25 +2792,32 @@
     "# What next?\n",
     "As you probably noticed by now, pandas is quite a large library with *many* features. Although we went through the most important features, there is still a lot to discover. Probably the best way to learn more is to get your hands dirty with some real-life data. It is also a good idea to go through pandas' excellent [documentation](http://pandas.pydata.org/pandas-docs/stable/index.html), in particular the [Cookbook](http://pandas.pydata.org/pandas-docs/stable/cookbook.html)."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.11"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
   },
   "toc": {
    "toc_cell": false,
@@ -3071,5 +2828,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }