From 821c3c5b7d6bd417101a211ee285e588ddb1355d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Thu, 18 Feb 2016 00:09:50 +0100 Subject: [PATCH] Added (unfinished) NumPy tutorial. --- index.ipynb | 1 + tools_numpy.ipynb | 1432 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1433 insertions(+) create mode 100644 tools_numpy.ipynb diff --git a/index.ipynb b/index.ipynb index c3ffc6d..faec72d 100644 --- a/index.ipynb +++ b/index.ipynb @@ -10,6 +10,7 @@ "**This work is in progress.**\n", "\n", "## Tools\n", + "* [NumPy](tools_numpy.ipynb)\n", "* [Matplotlib](tools_matplotlib.ipynb)" ] } diff --git a/tools_numpy.ipynb b/tools_numpy.ipynb new file mode 100644 index 0000000..a94ed77 --- /dev/null +++ b/tools_numpy.ipynb @@ -0,0 +1,1432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tools - NumPy\n", + "*NumPy is the fundamental library for scientific computing with Python. NumPy is centered around a powerful N-dimensional array object, and it also contains useful linear algebra, Fourier transform, and random number functions.*\n", + "\n", + "## Creating arrays\n", + "First let's import `numpy`:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `zeros` function creates an array containing any number of zeros:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.zeros(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's just as easy to create a 2D array (ie. a matrix) by providing a tuple with the desired number of rows and columns. For example, here's a 3x4 matrix:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.zeros((3,4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some vocabulary:\n", + "\n", + "* In NumPy, each dimension is called an **axis**.\n", + "* The number of axes is called the **rank**.\n", + " * For example, the above 3x4 matrix is an array of rank 2 (it is 2-dimensional).\n", + " * The first axis has length 3, the second has length 4.\n", + "* An array's list of axis lengths is called the **shape** of the array.\n", + " * For example, the above matrix's shape is `(3, 4)`.\n", + " * The rank is equal to the shape's length.\n", + "* The **size** of an array is the total number of elements, which is the product of all axis lengths (eg. 3*4=12)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.zeros((3,4))\n", + "print a\n", + "print \"Shape:\", a.shape\n", + "print \"Rank:\", a.ndim\n", + "print \"Size:\", a.size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also create an N-dimensional array of arbitrary rank. For example, here's a 3D array (rank=3), with shape `(2,3,4)`:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.zeros((2,3,4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at the type of these arrays:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print type(np.zeros((3,4)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Many other NumPy functions create `ndarrays`.\n", + "\n", + "Here's a 3x4 matrix full of ones:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.ones((3,4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An uninitialized 2x3 array (its content is not predictable, as it is whatever is in memory at that point):" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "print np.empty((2,3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course you can initialize an `ndarray` using a regular python array (or any iterable). Just call the `array` function:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.array([[1,2,3,4], [10, 20, 30, 40]])\n", + "print type(a)\n", + "print a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can create an `ndarray` using NumPy's `range` function, which is similar to python's built-in `range` function:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "print np.arange(1, 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It also works with floats:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.arange(1.0, 5.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course you can provide a step parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.arange(1, 5, 0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, when dealing with floats, the exact number of elements in the array is not always predictible. For example, consider this:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.arange(0, 5/3.0, 1/3.0) # depending on floating point errors, the max value is 4/3.0 or 5/3.0.\n", + "print np.arange(0, 5/3.0, 0.333333333)\n", + "print np.arange(0, 5/3.0, 0.333333334)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this reason, it is generally preferable to use the `linspace` function instead of `arange` when working with floats. The `linspace` function returns an array containing a specific number of points evenly distributed between two values (note that the maximum value is *included*, contrary to `arange`):" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.linspace(0, 5/3.0, 6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A number of functions are available in NumPy's `random` module to create `ndarray`s initialized with random values.\n", + "For example, here is a 3x4 matrix initialized with random floats between 0 and 1 (uniform distribution):" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.random.rand(3,4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's a 3x4 matrix containing random floats sampled from a univariate [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution) (Gaussian distribution) of mean 0 and variance 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print np.random.randn(3,4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To give you a feel of what these distributions look like, let's use matplotlib (see the [matplotlib tutorial](tools_matplotlib.ipynb) for more details):" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "plt.hist(np.random.rand(100000), normed=True, bins=100, histtype=\"step\", color=\"blue\", label=\"rand\")\n", + "plt.hist(np.random.randn(100000), normed=True, bins=100, histtype=\"step\", color=\"red\", label=\"randn\")\n", + "plt.axis([-2.5, 2.5, 0, 1.1])\n", + "plt.legend(loc = \"upper left\")\n", + "plt.title(\"Random distributions\")\n", + "plt.xlabel(\"Value\")\n", + "plt.ylabel(\"Density\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also initialize an `ndarray` using a function:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def my_function(z, y, x):\n", + " return x * y + z\n", + "\n", + "print np.fromfunction(my_function, (3, 2, 10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NumPy first creates three `ndarrays` (one per dimension), each of shape `(3, 2, 10)`. Each array has values equal to the coordinate along a specific axis. For example, all elements in the `z` array are equal to the z-coordinate:\n", + "\n", + " [[[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]\n", + " \n", + " [[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n", + " [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n", + " \n", + " [[ 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n", + " [ 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]]\n", + "\n", + "This means that `my_function` is only called once, so the initialization is very efficient." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Array data\n", + "NumPy's `ndarray`s are very efficient in part because all their elements must have the same type (usually numbers).\n", + "You can check what the data type is by looking at the `dtype` attribute:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "a = np.arange(1, 5)\n", + "print a.dtype, a\n", + "\n", + "b = np.arange(1.0, 5.0)\n", + "print b.dtype, b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instead of letting NumPy guess what data type to use, you can set it explicitly when creating an array by setting the `dtype` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.arange(1, 5, dtype=np.complex64)\n", + "print a.dtype, a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Available data types include int8, int16, int32, int64, uint8/16/32/64, float16/32/64 and complex64/128. Check out [the documentation](http://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html) for the full list.\n", + "\n", + "The `itemsize` attribute returns the size (in bytes) of each item:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.arange(1, 5, dtype=np.complex64)\n", + "print a.itemsize" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An array's data is actually stored in memory as a flat (one dimensional) byte buffer. It is available *via* the `data` attribute (you will rarely need it, though)." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "a = np.array([[1,2],[1000, 2000]], dtype=np.int32)\n", + "print \"Array:\"\n", + "print a\n", + "print \"Raw data:\"\n", + "print [ord(c) for c in a.data]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Several `ndarrays` can share the same data buffer, meaning that modifying one will also modify the others. We will see an example in a minute." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reshaping an array\n", + "Changing the shape of an `ndarray` is as simple as setting its `shape` attribute. However, the array's size must remain the same." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.arange(24)\n", + "print a\n", + "print \"Rank:\", a.ndim" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a.shape = (6, 4)\n", + "print a\n", + "print \"Rank:\", a.ndim" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "a.shape = (2, 3, 4)\n", + "print a\n", + "print \"Rank:\", a.ndim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `reshape` function returns a new `ndarray` object pointing to the *same* data. This means that modifying one array will also modify the other." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "a2 = a.reshape(4,6)\n", + "print a2\n", + "print \"Rank:\", a2.ndim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set item at row 1, col 2 to 999 (more about indexing below)." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a2[1, 2] = 999\n", + "print a2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The corresponding element in a has been modified." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, the `ravel` function returns a new one-dimensional `ndarray` that also points to the same data:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a3 = a.ravel()\n", + "print a3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Array indexing\n", + "One-dimensional NumPy arrays can be accessed more or less like regular python arrays:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.array([1, 5, 3, 19, 13, 7, 3])\n", + "print a[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a[2:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a[2:-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a[:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a[2::2]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a[::-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course, you can modify elements:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a[3]=999\n", + "print a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also modify an `ndarray` slice:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a[2:5] = [997, 998, 999]\n", + "print a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And if you assign a single value, it is copied across the whole slice (this is called *broadcasting*, more on this below):" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a[2:5] = -1\n", + "print a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Contrary to regular python arrays, you cannot grow or shrink `ndarray`s this way:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "try:\n", + " a[2:5] = [1,2,3,4,5,6] # too long\n", + "except ValueError, e:\n", + " print e" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You cannot delete elements either:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "try:\n", + " del a[2:5]\n", + "except ValueError, e:\n", + " print e" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Multi-dimensional arrays can be accessed in a similar way by providing an index or slice for each axis, separated by commas:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b = np.arange(48).reshape(4, 12)\n", + "print b" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print b[1, 2] # row 1, col 2" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print b[1, :] # row 1, all columns" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print b[:, 1] # all rows, column 1" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "print b[(0,2), 2:5] # rows 0 and 2, columns 2 to 4 (5-1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also provide an `ndarray` of boolean values to specify the indices that you want to access. This will come in handy later:" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bools = np.array([True, False, True, False])\n", + "print b[bools, :] # Rows 0 and 2, all columns. Equivalent to b[(1, 3), :]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print b[b % 3 == 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note the subtle difference between these two expressions: " + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print b[1, :]\n", + "print b[1:2, :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first expression returns row 1 as a 1D array of shape `(12,)`, while the second returns that same row as a 2D array of shape `(1, 12)`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Everything works just as well with higher dimension arrays:" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c = b.reshape(4,2,6)\n", + "print c" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print c[2, 1, 4] # matrix 2, row 1, col 4" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print c[2, :, 3] # matrix 2, all rows, col 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you omit coordinates for some axes, then all elements in these axes are returned:" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print c[2, 1] # Return matrix 2, row 1, all columns. This is equivalent to c[2, 1, :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You may also write an ellipsis (`...`) to specify that all non-specified axes must be entirely included." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print c[2, ...] # matrix 2, all rows, all columns. This is equivalent to c[2, :, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print c[2, 1, ...] # matrix 2, row 1, all columns. This is equivalent to c[2, 1, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print c[2, ..., 3] # matrix 2, all rows, column 3. This is equivalent to c[2, :, 3]" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "print c[..., 3] # all matrices, all rows, column 3. This is equivalent to c[:, :, 3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Broadcasting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we discussed above, assigning to an `ndarray` slice requires an `ndarray` of the same shape as the slice. In general, when NumPy expects arrays of the same shape but finds that this is not the case, it applies the so-called *broadcasting* rules:\n", + "\n", + "**First rule**: if the arrays do not have the same rank, then a 1 will be prepended to the smaller ranking arrays until their ranks match.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.arange(10).reshape(1, 1, 10)\n", + "print a" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print \"Slice:\", a[..., 2:4]\n", + "print \"Shape:\", a[..., 2:4].shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's try to assign a 1D array of shape `(2,)` to this 3D array of shape `(1,1,2)`. Applying the first rule of broadcasting!" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a[..., 2:4] = [55, 66] # acts as [[[55, 56]]]\n", + "print a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Second rule**: arrays with a 1 along a particular dimension act as if they had the size of the array with the largest shape along that dimension. The value of the array element is repeated along that dimension." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b = np.arange(10).reshape(2, 5)\n", + "print b" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print \"Slice:\"\n", + "print b[..., 1:4]\n", + "print \"Shape:\", b[..., 1:4].shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try to assign a 2D array of shape `(2,1)` to this slice of shape `(2, 3)`. NumPy will apply the second rule of broadcasting:" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b[..., 1:4] = [[44], [55]] # acts as [[44, 44, 44], [55, 55, 55]]\n", + "print b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Combining rules 1 & 2, we can do this:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b[..., 1:4] = [66, 77, 88] # after rule 1: [[66, 77, 88]], and after rule 2: [[66, 77, 88], [66, 77, 88]]\n", + "print b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And also, very simply:" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b[..., 1:4] = 99 # after rule 1: [[99]], and after rule 2: [[99, 99, 99], [99, 99, 99]]\n", + "print b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Third rule**: after rules 1 & 2, the sizes of all arrays must match." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "try:\n", + " b[..., 1:4] = [33, 44]\n", + "except ValueError, e:\n", + " print e" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Broadcasting rules are used in many NumPy operations, not just assignment, as we will see below.\n", + "For more details about broadcasting, check out [the documentation](https://docs.scipy.org/doc/numpy-dev/user/basics.broadcasting.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Iterating\n", + "Iterating over `ndarray`s is very similar to iterating over regular python arrays. Note that iterating over multidimensional arrays is done with respect to the first axis." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c = np.arange(24).reshape(2, 2, 6) # A 3D array (composed of two 2x6 matrices)\n", + "print c" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for m in c:\n", + " print \"Item:\"\n", + " print m" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for i in range(len(c)): # Note that len(c) == c.shape[0]\n", + " print \"Item:\"\n", + " print c[i]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to iterate on *all* elements in the `ndarray`, simply iterate over the `flat` attribute:" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for i in c.flat:\n", + " print \"Item:\", i" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arithmetic operations\n", + "All the usual arithmetic operators (`+`, `-`, `*`, `/`, `**`, etc.) can be used with `ndarray`s. They apply elementwise:" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "a = np.array([14, 23, 32, 41])\n", + "b = np.array([5, 4, 3, 2])\n", + "print \"a + b =\", a + b\n", + "print \"a - b =\", a - b\n", + "print \"a * b =\", a * b\n", + "print \"a / b =\", a / b\n", + "print \"a % b =\", a % b\n", + "print \"a ** b =\", a ** b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the multiplication is *not* a matrix multiplication. We will discuss matrix operations below.\n", + "\n", + "The arrays must have the same shape. If they do not, NumPy will apply the broadcasting rules, as discussed above." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a * 3 # thanks to broadcasting, this is equivalent to: a * [3, 3, 3, 3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The conditional operators also apply elementwise:" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a < [15, 16, 35, 36]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And using broadcasting:" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a < 25 # equivalent to a < [25, 25, 25, 25]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is most useful in conjunction with boolean indexing:" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print a[a < 25]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that all matching elements are returned as a 1D array, no matter the original array's shape:" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p = np.fromfunction(lambda row, col: row*col, (3,6))\n", + "print p" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "print p[p%3 == 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is possible (and quite convenient) to use boolean indexing and assignment jointly:" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p[p%3 == 1] = 99\n", + "print p" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## To be continued..." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}