diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index 42dc04f..7a72959 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -35,9 +35,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# To support both python 2 and python 3\n", @@ -61,13 +59,14 @@ "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"end_to_end_project\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)" + " plt.savefig(path, format=fig_extension, dpi=resolution)" ] }, { @@ -80,9 +79,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import os\n", @@ -106,9 +103,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "fetch_housing_data()" @@ -117,9 +112,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", @@ -182,9 +175,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# to make this notebook's output identical at every run\n", @@ -194,9 +185,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -223,9 +212,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import hashlib\n", @@ -242,9 +229,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# This version supports both Python 2 and Python 3, instead of just Python 3.\n", @@ -255,9 +240,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "housing_with_id = housing.reset_index() # adds an `index` column\n", @@ -267,9 +250,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "housing_with_id[\"id\"] = housing[\"longitude\"] * 1000 + housing[\"latitude\"]\n", @@ -288,9 +269,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", @@ -319,9 +298,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Divide by 1.5 to limit the number of income categories\n", @@ -351,9 +328,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import StratifiedShuffleSplit\n", @@ -370,15 +345,22 @@ "metadata": {}, "outputs": [], "source": [ - "housing[\"income_cat\"].value_counts() / len(housing)" + "strat_test_set[\"income_cat\"].value_counts() / len(strat_test_set)" ] }, { "cell_type": "code", "execution_count": 26, - "metadata": { - "collapsed": true - }, + "metadata": {}, + "outputs": [], + "source": [ + "housing[\"income_cat\"].value_counts() / len(housing)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, "outputs": [], "source": [ "def income_cat_proportions(data):\n", @@ -397,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -406,10 +388,8 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": { - "collapsed": true - }, + "execution_count": 29, + "metadata": {}, "outputs": [], "source": [ "for set_ in (strat_train_set, strat_test_set):\n", @@ -425,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": { "collapsed": true }, @@ -436,7 +416,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -446,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -463,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -477,7 +457,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -505,7 +485,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": { "collapsed": true }, @@ -516,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -525,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -540,7 +520,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -552,7 +532,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": { "collapsed": true }, @@ -572,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -582,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -594,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -610,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "metadata": { "collapsed": true }, @@ -622,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -632,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -641,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -650,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -661,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "metadata": { "collapsed": true }, @@ -681,18 +661,19 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "housing_num = housing.drop(\"ocean_proximity\", axis=1)" + "housing_num = housing.drop('ocean_proximity', axis=1)\n", + "# alternatively: housing_num = housing.select_dtypes(include=[np.number])" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -701,7 +682,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -717,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -733,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 53, "metadata": { "collapsed": true }, @@ -744,7 +725,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 54, "metadata": { "collapsed": true }, @@ -756,7 +737,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -765,7 +746,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -774,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -791,11 +772,11 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ - "housing_cat = housing[\"ocean_proximity\"]\n", + "housing_cat = housing['ocean_proximity']\n", "housing_cat.head(10)" ] }, @@ -808,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ @@ -818,7 +799,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -841,7 +822,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -861,7 +842,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -877,7 +858,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 63, "metadata": { "collapsed": true }, @@ -1080,7 +1061,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 64, "metadata": {}, "outputs": [], "source": [ @@ -1101,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -1117,7 +1098,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ @@ -1128,7 +1109,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 67, "metadata": {}, "outputs": [], "source": [ @@ -1144,7 +1125,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 68, "metadata": { "collapsed": true }, @@ -1176,7 +1157,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 69, "metadata": {}, "outputs": [], "source": [ @@ -1193,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 70, "metadata": { "collapsed": true }, @@ -1213,7 +1194,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ @@ -1229,7 +1210,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 72, "metadata": { "collapsed": true }, @@ -1257,7 +1238,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ @@ -1279,7 +1260,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 74, "metadata": { "collapsed": true }, @@ -1295,7 +1276,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ @@ -1305,7 +1286,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ @@ -1321,7 +1302,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 77, "metadata": {}, "outputs": [], "source": [ @@ -1333,7 +1314,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -1354,7 +1335,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 79, "metadata": {}, "outputs": [], "source": [ @@ -1363,7 +1344,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 80, "metadata": {}, "outputs": [], "source": [ @@ -1372,7 +1353,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 81, "metadata": {}, "outputs": [], "source": [ @@ -1386,7 +1367,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 82, "metadata": {}, "outputs": [], "source": [ @@ -1398,7 +1379,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ @@ -1410,7 +1391,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ @@ -1429,7 +1410,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 85, "metadata": { "collapsed": true }, @@ -1444,7 +1425,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ @@ -1458,7 +1439,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 87, "metadata": {}, "outputs": [], "source": [ @@ -1470,7 +1451,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 88, "metadata": {}, "outputs": [], "source": [ @@ -1482,7 +1463,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -1494,7 +1475,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ @@ -1508,7 +1489,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -1518,7 +1499,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ @@ -1534,7 +1515,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ @@ -1563,7 +1544,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ @@ -1572,7 +1553,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ @@ -1588,7 +1569,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ @@ -1599,7 +1580,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -1608,7 +1589,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 98, "metadata": {}, "outputs": [], "source": [ @@ -1628,7 +1609,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 99, "metadata": {}, "outputs": [], "source": [ @@ -1639,7 +1620,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 100, "metadata": {}, "outputs": [], "source": [ @@ -1649,7 +1630,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -1662,7 +1643,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 102, "metadata": { "collapsed": true }, @@ -1682,7 +1663,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 103, "metadata": {}, "outputs": [], "source": [ @@ -1705,7 +1686,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 104, "metadata": {}, "outputs": [], "source": [ @@ -1727,7 +1708,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 105, "metadata": { "collapsed": true }, @@ -1738,7 +1719,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 106, "metadata": { "collapsed": true }, @@ -1759,7 +1740,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ @@ -1797,7 +1778,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ @@ -1823,7 +1804,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ @@ -1841,7 +1822,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 110, "metadata": {}, "outputs": [], "source": [ @@ -1871,7 +1852,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 111, "metadata": {}, "outputs": [], "source": [ @@ -1904,7 +1885,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -1922,7 +1903,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ @@ -1945,7 +1926,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ @@ -1970,7 +1951,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -2009,7 +1990,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 116, "metadata": { "collapsed": true }, @@ -2047,7 +2028,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 117, "metadata": { "collapsed": true }, @@ -2065,7 +2046,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -2075,7 +2056,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -2091,7 +2072,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 120, "metadata": {}, "outputs": [], "source": [ @@ -2107,7 +2088,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 121, "metadata": { "collapsed": true }, @@ -2121,7 +2102,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 122, "metadata": { "collapsed": true }, @@ -2139,7 +2120,7 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -2155,7 +2136,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 124, "metadata": {}, "outputs": [], "source": [ @@ -2185,7 +2166,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 125, "metadata": { "collapsed": true }, @@ -2200,7 +2181,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 126, "metadata": {}, "outputs": [], "source": [ @@ -2216,7 +2197,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -2250,7 +2231,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ @@ -2266,7 +2247,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -2282,7 +2263,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -2313,18 +2294,19 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.2" }, "nav_menu": { "height": "279px", "width": "309px" }, "toc": { - "navigate_menu": true, + "nav_menu": {}, "number_sections": true, "sideBar": true, - "threshold": 6, + "skip_h1_title": false, "toc_cell": false, + "toc_position": {}, "toc_section_display": "block", "toc_window_display": false } diff --git a/03_classification.ipynb b/03_classification.ipynb index ea39159..1e7960a 100644 --- a/03_classification.ipynb +++ b/03_classification.ipynb @@ -1494,7 +1494,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* Yikes, only 48% **Survived**. :( That's close to 50%, so accuracy will be a reasonable metric to evaluate our model.\n", + "* Yikes, only 38% **Survived**. :( That's close enough to 40%, so accuracy will be a reasonable metric to evaluate our model.\n", "* The mean **Fare** was £32.20, which does not seem so expensive (but it was probably a lot of money back then).\n", "* The mean **Age** was less than 30 years old." ] diff --git a/05_support_vector_machines.ipynb b/05_support_vector_machines.ipynb index aa85276..687d74b 100644 --- a/05_support_vector_machines.ipynb +++ b/05_support_vector_machines.ipynb @@ -1017,7 +1017,8 @@ " boundary_x2s = -x1s*(w[0]/w[1])-b/w[1]\n", " margin_x2s_1 = -x1s*(w[0]/w[1])-(b-1)/w[1]\n", " margin_x2s_2 = -x1s*(w[0]/w[1])-(b+1)/w[1]\n", - " ax.plot_surface(x1s, x2, 0, color=\"b\", alpha=0.2, cstride=100, rstride=100)\n", + " ax.plot_surface(x1s, x2, np.zeros_like(x1),\n", + " color=\"b\", alpha=0.2, cstride=100, rstride=100)\n", " ax.plot(x1s, boundary_x2s, 0, \"k-\", linewidth=2, label=r\"$h=0$\")\n", " ax.plot(x1s, margin_x2s_1, 0, \"k--\", linewidth=2, label=r\"$h=\\pm 1$\")\n", " ax.plot(x1s, margin_x2s_2, 0, \"k--\", linewidth=2)\n", diff --git a/10_introduction_to_artificial_neural_networks.ipynb b/10_introduction_to_artificial_neural_networks.ipynb index 99fd4e1..7c22236 100644 --- a/10_introduction_to_artificial_neural_networks.ipynb +++ b/10_introduction_to_artificial_neural_networks.ipynb @@ -2,40 +2,28 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**Chapter 10 – Introduction to Artificial Neural Networks**" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "_This notebook contains all the sample code and solutions to the exercises in chapter 10._" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" ] @@ -43,11 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "# To support both python 2 and python 3\n", @@ -85,10 +69,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Perceptrons" ] @@ -97,9 +78,7 @@ "cell_type": "code", "execution_count": 2, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -120,11 +99,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "y_pred" @@ -133,11 +108,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "a = -per_clf.coef_[0][0] / per_clf.coef_[0][1]\n", @@ -173,10 +144,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Activation functions" ] @@ -185,9 +153,7 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -204,11 +170,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "z = np.linspace(-5, 5, 200)\n", @@ -245,9 +207,7 @@ "cell_type": "code", "execution_count": 7, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -264,11 +224,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "x1s = np.linspace(-0.2, 1.2, 100)\n", @@ -297,20 +253,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# FNN for MNIST" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## using tf.learn" ] @@ -318,11 +268,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "from tensorflow.examples.tutorials.mnist import input_data\n", @@ -334,9 +280,7 @@ "cell_type": "code", "execution_count": 10, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -349,11 +293,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -370,11 +310,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score\n", @@ -386,11 +322,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import log_loss\n", @@ -402,9 +334,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "source": [ "## Using plain TensorFlow" @@ -413,11 +343,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -431,11 +357,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -447,11 +369,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def neuron_layer(X, n_neurons, name, activation=None):\n", @@ -471,11 +389,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"dnn\"):\n", @@ -489,11 +403,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"loss\"):\n", @@ -505,11 +415,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.01\n", @@ -522,11 +428,7 @@ { "cell_type": "code", "execution_count": 20, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"eval\"):\n", @@ -537,11 +439,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()\n", @@ -551,11 +449,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": true, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "n_epochs = 40\n", @@ -565,11 +459,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -579,9 +469,9 @@ " X_batch, y_batch = mnist.train.next_batch(batch_size)\n", " sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n", " acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n", - " acc_test = accuracy.eval(feed_dict={X: mnist.test.images,\n", - " y: mnist.test.labels})\n", - " print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n", + " acc_val = accuracy.eval(feed_dict={X: mnist.validation.images,\n", + " y: mnist.validation.labels})\n", + " print(epoch, \"Train accuracy:\", acc_train, \"Val accuracy:\", acc_val)\n", "\n", " save_path = saver.save(sess, \"./my_model_final.ckpt\")" ] @@ -589,11 +479,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -606,11 +492,7 @@ { "cell_type": "code", "execution_count": 25, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "print(\"Predicted classes:\", y_pred)\n", @@ -621,9 +503,7 @@ "cell_type": "code", "execution_count": 26, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -668,11 +548,7 @@ { "cell_type": "code", "execution_count": 27, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" @@ -680,20 +556,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Using `dense()` instead of `neuron_layer()`" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n", "* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n", @@ -704,11 +574,7 @@ { "cell_type": "code", "execution_count": 28, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "n_inputs = 28*28 # MNIST\n", @@ -721,9 +587,7 @@ "cell_type": "code", "execution_count": 29, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -736,11 +600,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"dnn\"):\n", @@ -755,9 +615,7 @@ "cell_type": "code", "execution_count": 31, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -770,9 +628,7 @@ "cell_type": "code", "execution_count": 32, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -787,9 +643,7 @@ "cell_type": "code", "execution_count": 33, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -802,9 +656,7 @@ "cell_type": "code", "execution_count": 34, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -815,11 +667,7 @@ { "cell_type": "code", "execution_count": 35, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "n_epochs = 20\n", @@ -841,11 +689,7 @@ { "cell_type": "code", "execution_count": 36, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "show_graph(tf.get_default_graph())" @@ -854,9 +698,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "source": [ "# Exercise solutions" @@ -864,10 +706,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## 1. to 8." ] @@ -875,9 +714,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "source": [ "See appendix A." @@ -885,30 +722,21 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## 9." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "_Train a deep MLP on the MNIST dataset and see if you can get over 98% precision. Just like in the last exercise of chapter 9, try adding all the bells and whistles (i.e., save checkpoints, restore the last checkpoint in case of an interruption, add summaries, plot learning curves using TensorBoard, and so on)._" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "First let's create the deep net. It's exactly the same as earlier, with just one addition: we add a `tf.summary.scalar()` to track the loss and the accuracy during training, so we can view nice learning curves using TensorBoard." ] @@ -916,11 +744,7 @@ { "cell_type": "code", "execution_count": 37, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "n_inputs = 28*28 # MNIST\n", @@ -933,9 +757,7 @@ "cell_type": "code", "execution_count": 38, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -948,11 +770,7 @@ { "cell_type": "code", "execution_count": 39, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.name_scope(\"dnn\"):\n", @@ -967,9 +785,7 @@ "cell_type": "code", "execution_count": 40, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -983,9 +799,7 @@ "cell_type": "code", "execution_count": 41, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1000,9 +814,7 @@ "cell_type": "code", "execution_count": 42, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1016,9 +828,7 @@ "cell_type": "code", "execution_count": 43, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1028,10 +838,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now we need to define the directory to write the TensorBoard logs to:" ] @@ -1040,9 +847,7 @@ "cell_type": "code", "execution_count": 44, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1061,9 +866,7 @@ "cell_type": "code", "execution_count": 45, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1072,10 +875,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now we can create the `FileWriter` that we will use to write the TensorBoard logs:" ] @@ -1084,9 +884,7 @@ "cell_type": "code", "execution_count": 46, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1095,10 +893,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Hey! Why don't we implement early stopping? For this, we are going to need a validation set. Luckily, the dataset returned by TensorFlow's `input_data()` function (see above) is already split into a training set (60,000 instances, already shuffled for us), a validation set (5,000 instances) and a test set (5,000 instances). So we can easily define `X_valid` and `y_valid`:" ] @@ -1106,11 +901,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "X_valid = mnist.validation.images\n", @@ -1121,9 +912,7 @@ "cell_type": "code", "execution_count": 48, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -1133,11 +922,7 @@ { "cell_type": "code", "execution_count": 49, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "n_epochs = 10001\n", @@ -1190,11 +975,7 @@ { "cell_type": "code", "execution_count": 50, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "os.remove(checkpoint_epoch_path)" @@ -1203,11 +984,7 @@ { "cell_type": "code", "execution_count": 51, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -1218,11 +995,7 @@ { "cell_type": "code", "execution_count": 52, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "accuracy_val" @@ -1232,9 +1005,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [] @@ -1256,7 +1027,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.3" + "version": "3.6.2" }, "nav_menu": { "height": "264px", @@ -1273,5 +1044,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/11_deep_learning.ipynb b/11_deep_learning.ipynb index 129a291..c002217 100644 --- a/11_deep_learning.ipynb +++ b/11_deep_learning.ipynb @@ -3312,7 +3312,14 @@ "rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,\n", " fit_params={\"X_valid\": X_valid1, \"y_valid\": y_valid1, \"n_epochs\": 1000},\n", " random_state=42, verbose=2)\n", - "rnd_search.fit(X_train1, y_train1)" + "rnd_search.fit(X_train1, y_train1)\n", + "\n", + "# fit_params as a constructor argument was deprecated in Scikit-Learn version 0.19 and will\n", + "# be removed in version 0.21. Pass fit parameters to the fit() method instead:\n", + "# rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,\n", + "# random_state=42, verbose=2)\n", + "# fit_params={\"X_valid\": X_valid1, \"y_valid\": y_valid1, \"n_epochs\": 1000}\n", + "# rnd_search.fit(X_train1, y_train1, **fit_params)\n" ] }, { @@ -4934,7 +4941,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.3" + "version": "3.6.2" }, "nav_menu": { "height": "360px", diff --git a/13_convolutional_neural_networks.ipynb b/13_convolutional_neural_networks.ipynb index 6fd6381..b3eec36 100644 --- a/13_convolutional_neural_networks.ipynb +++ b/13_convolutional_neural_networks.ipynb @@ -144,7 +144,6 @@ "fmap = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32)\n", "fmap[:, 3, 0, 0] = 1\n", "fmap[3, :, 0, 1] = 1\n", - "fmap[:, :, 0, 0]\n", "plot_image(fmap[:, :, 0, 0])\n", "plt.show()\n", "plot_image(fmap[:, :, 0, 1])\n", @@ -501,6 +500,17 @@ " saver = tf.train.Saver()" ] }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "Note: if you are using Python 3.6 on OSX, you need to run the following command on terminal to install the certifi package of certificates because Python 3.6 on OSX has no certificates to validate SSL connections (see this [StackOverflow question](https://stackoverflow.com/questions/27835619/urllib-and-ssl-certificate-verify-failed-error)):\n", + "\n", + " $ /Applications/Python\\ 3.6/Install\\ Certificates.command" + ] + }, { "cell_type": "code", "execution_count": 22, @@ -1949,7 +1959,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.2" }, "nav_menu": {}, "toc": { diff --git a/14_recurrent_neural_networks.ipynb b/14_recurrent_neural_networks.ipynb index c9eadd3..267d821 100644 --- a/14_recurrent_neural_networks.ipynb +++ b/14_recurrent_neural_networks.ipynb @@ -31,9 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# To support both python 2 and python 3\n", @@ -79,9 +77,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf" @@ -104,9 +100,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -130,9 +124,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -173,9 +165,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_inputs = 3\n", @@ -185,9 +175,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -204,9 +192,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -215,9 +201,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])\n", @@ -249,9 +233,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.display import clear_output, Image, display, HTML\n", @@ -311,9 +293,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_steps = 2\n", @@ -324,9 +304,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -343,9 +321,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -354,9 +330,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.array([\n", @@ -400,9 +374,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_steps = 2\n", @@ -413,9 +385,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -429,9 +399,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -440,9 +408,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.array([\n", @@ -485,9 +451,7 @@ { "cell_type": "code", "execution_count": 27, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_steps = 2\n", @@ -503,9 +467,7 @@ { "cell_type": "code", "execution_count": 28, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "seq_length = tf.placeholder(tf.int32, [None])\n", @@ -516,9 +478,7 @@ { "cell_type": "code", "execution_count": 29, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -527,9 +487,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.array([\n", @@ -545,9 +503,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -593,9 +549,7 @@ { "cell_type": "code", "execution_count": 34, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -668,9 +622,7 @@ { "cell_type": "code", "execution_count": 37, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -688,9 +640,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_neurons = 100\n", @@ -706,9 +656,7 @@ { "cell_type": "code", "execution_count": 39, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "states_concat = tf.concat(axis=1, values=states)\n", @@ -754,9 +702,7 @@ { "cell_type": "code", "execution_count": 41, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "t_min, t_max = 0, 30\n", @@ -808,9 +754,7 @@ { "cell_type": "code", "execution_count": 43, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch, y_batch = next_batch(1, n_steps)" @@ -842,9 +786,7 @@ { "cell_type": "code", "execution_count": 45, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -871,9 +813,7 @@ { "cell_type": "code", "execution_count": 46, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -890,9 +830,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "cell = tf.contrib.rnn.OutputProjectionWrapper(\n", @@ -903,9 +841,7 @@ { "cell_type": "code", "execution_count": 48, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)" @@ -914,9 +850,7 @@ { "cell_type": "code", "execution_count": 49, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.001\n", @@ -931,9 +865,7 @@ { "cell_type": "code", "execution_count": 50, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "saver = tf.train.Saver()" @@ -1009,9 +941,7 @@ { "cell_type": "code", "execution_count": 55, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1027,9 +957,7 @@ { "cell_type": "code", "execution_count": 56, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n", @@ -1039,9 +967,7 @@ { "cell_type": "code", "execution_count": 57, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_outputs = 1\n", @@ -1051,9 +977,7 @@ { "cell_type": "code", "execution_count": 58, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", @@ -1064,9 +988,7 @@ { "cell_type": "code", "execution_count": 59, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "loss = tf.reduce_mean(tf.square(outputs - y))\n", @@ -1216,9 +1138,7 @@ { "cell_type": "code", "execution_count": 66, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1232,9 +1152,7 @@ { "cell_type": "code", "execution_count": 67, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_neurons = 100\n", @@ -1249,9 +1167,7 @@ { "cell_type": "code", "execution_count": 68, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -1260,9 +1176,7 @@ { "cell_type": "code", "execution_count": 69, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_batch = np.random.rand(2, n_steps, n_inputs)" @@ -1271,9 +1185,7 @@ { "cell_type": "code", "execution_count": 70, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.Session() as sess:\n", @@ -1307,9 +1219,7 @@ { "cell_type": "code", "execution_count": 72, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with tf.device(\"/gpu:0\"): # BAD! This is ignored.\n", @@ -1329,9 +1239,7 @@ { "cell_type": "code", "execution_count": 73, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -1357,9 +1265,7 @@ { "cell_type": "code", "execution_count": 74, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1374,9 +1280,7 @@ { "cell_type": "code", "execution_count": 75, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "devices = [\"/cpu:0\", \"/cpu:0\", \"/cpu:0\"] # replace with [\"/gpu:0\", \"/gpu:1\", \"/gpu:2\"] if you have 3 GPUs\n", @@ -1386,12 +1290,17 @@ "outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, since TensorFlow 1.1, you can use the `tf.contrib.rnn.DeviceWrapper` class (alias `tf.nn.rnn_cell.DeviceWrapper` since TF 1.2)." + ] + }, { "cell_type": "code", "execution_count": 76, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "init = tf.global_variables_initializer()" @@ -1420,9 +1329,7 @@ { "cell_type": "code", "execution_count": 78, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1431,22 +1338,33 @@ "n_neurons = 100\n", "n_layers = 3\n", "n_steps = 20\n", - "n_outputs = 1\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" + "n_outputs = 1" ] }, { "cell_type": "code", "execution_count": 79, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "keep_prob = 0.5\n", - "\n", + "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", + "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: the `input_keep_prob` parameter can be a placeholder, making it possible to set it to any value you want during training, and to 1.0 during testing (effectively turning dropout off). This is a much more elegant solution than what was recommended in earlier versions of the book (i.e., writing your own wrapper class or having a separate model for training and testing). Thanks to Shen Cheng for bringing this to my attention." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "keep_prob = tf.placeholder_with_default(1.0, shape=())\n", "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", " for layer in range(n_layers)]\n", "cells_drop = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", @@ -1457,10 +1375,8 @@ }, { "cell_type": "code", - "execution_count": 80, - "metadata": { - "collapsed": true - }, + "execution_count": 81, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.01\n", @@ -1477,78 +1393,29 @@ "saver = tf.train.Saver()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Unfortunately, this code is only usable for training, because the `DropoutWrapper` class has no `training` parameter, so it always applies dropout, even when the model is not being trained, so we must first train the model, then create a different model for testing, without the `DropoutWrapper`." - ] - }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 82, "metadata": {}, "outputs": [], "source": [ - "n_iterations = 1000\n", + "n_iterations = 1500\n", "batch_size = 50\n", + "train_keep_prob = 0.5\n", "\n", "with tf.Session() as sess:\n", " init.run()\n", " for iteration in range(n_iterations):\n", " X_batch, y_batch = next_batch(batch_size, n_steps)\n", - " _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch})\n", - " if iteration % 100 == 0:\n", - " print(iteration, \"Training MSE:\", mse)\n", + " _, mse = sess.run([training_op, loss],\n", + " feed_dict={X: X_batch, y: y_batch,\n", + " keep_prob: train_keep_prob})\n", + " if iteration % 100 == 0: # not shown in the book\n", + " print(iteration, \"Training MSE:\", mse) # not shown\n", " \n", " saver.save(sess, \"./my_dropout_time_series_model\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that the model is trained, we need to create the model again, but without the `DropoutWrapper` for testing:" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "n_inputs = 1\n", - "n_neurons = 100\n", - "n_layers = 3\n", - "n_steps = 20\n", - "n_outputs = 1\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", - "\n", - "keep_prob = 0.5\n", - "\n", - "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - " for layer in range(n_layers)]\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", - "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", - "\n", - "learning_rate = 0.01\n", - "\n", - "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n", - "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n", - "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n", - "\n", - "loss = tf.reduce_mean(tf.square(outputs - y))\n", - "\n", - "init = tf.global_variables_initializer()\n", - "saver = tf.train.Saver()" - ] - }, { "cell_type": "code", "execution_count": 83, @@ -1559,8 +1426,15 @@ " saver.restore(sess, \"./my_dropout_time_series_model\")\n", "\n", " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))\n", - " y_pred = sess.run(outputs, feed_dict={X: X_new})\n", - "\n", + " y_pred = sess.run(outputs, feed_dict={X: X_new})" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ "plt.title(\"Testing the model\", fontsize=14)\n", "plt.plot(t_instance[:-1], time_series(t_instance[:-1]), \"bo\", markersize=10, label=\"instance\")\n", "plt.plot(t_instance[1:], time_series(t_instance[1:]), \"w*\", markersize=10, label=\"target\")\n", @@ -1578,59 +1452,6 @@ "Oops, it seems that Dropout does not help at all in this particular case. :/" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another option is to write a script with a command line argument to specify whether you want to train the mode or use it for making predictions:" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [], - "source": [ - "reset_graph()\n", - "\n", - "import sys\n", - "training = True # in a script, this would be (sys.argv[-1] == \"train\") instead\n", - "\n", - "X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])\n", - "y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])\n", - "\n", - "cells = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)\n", - " for layer in range(n_layers)]\n", - "if training:\n", - " cells = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=keep_prob)\n", - " for cell in cells]\n", - "multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)\n", - "rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n", - "\n", - "stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) # not shown in the book\n", - "stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) # not shown\n", - "outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) # not shown\n", - "loss = tf.reduce_mean(tf.square(outputs - y)) # not shown\n", - "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # not shown\n", - "training_op = optimizer.minimize(loss) # not shown\n", - "init = tf.global_variables_initializer() # not shown\n", - "saver = tf.train.Saver() # not shown\n", - "\n", - "with tf.Session() as sess:\n", - " if training:\n", - " init.run()\n", - " for iteration in range(n_iterations):\n", - " X_batch, y_batch = next_batch(batch_size, n_steps) # not shown\n", - " _, mse = sess.run([training_op, loss], feed_dict={X: X_batch, y: y_batch}) # not shown\n", - " if iteration % 100 == 0: # not shown\n", - " print(iteration, \"Training MSE:\", mse) # not shown\n", - " save_path = saver.save(sess, \"/tmp/my_model.ckpt\")\n", - " else:\n", - " saver.restore(sess, \"/tmp/my_model.ckpt\")\n", - " X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) # not shown\n", - " y_pred = sess.run(outputs, feed_dict={X: X_new}) # not shown" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -2706,7 +2527,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.2" }, "nav_menu": {}, "toc": { diff --git a/15_autoencoders.ipynb b/15_autoencoders.ipynb index 998981e..0629c1f 100644 --- a/15_autoencoders.ipynb +++ b/15_autoencoders.ipynb @@ -131,6 +131,8 @@ }, "outputs": [], "source": [ + "import numpy.random as rnd\n", + "\n", "rnd.seed(4)\n", "m = 200\n", "w1, w2 = 0.1, 0.3\n", diff --git a/16_reinforcement_learning.ipynb b/16_reinforcement_learning.ipynb index 96e94a2..15c258e 100644 --- a/16_reinforcement_learning.ipynb +++ b/16_reinforcement_learning.ipynb @@ -31,9 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# To support both python 2 and python 3\n", @@ -95,9 +93,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import gym" @@ -129,9 +125,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()" @@ -163,9 +157,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "img = env.render(mode=\"rgb_array\")" @@ -226,9 +218,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def plot_environment(env, figsize=(5,4)):\n", @@ -273,9 +263,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env.reset()\n", @@ -311,9 +299,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs, reward, done, info = env.step(0)" @@ -393,9 +379,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "frames = []\n", @@ -424,9 +408,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def update_scene(num, frames, patch):\n", @@ -461,9 +443,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env.close()" @@ -502,9 +482,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()" @@ -547,9 +525,7 @@ { "cell_type": "code", "execution_count": 25, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from PIL import Image, ImageDraw\n", @@ -633,9 +609,7 @@ { "cell_type": "code", "execution_count": 28, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()\n", @@ -677,9 +651,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "obs = env.reset()\n", @@ -722,9 +694,7 @@ { "cell_type": "code", "execution_count": 33, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "frames = []\n", @@ -795,9 +765,7 @@ { "cell_type": "code", "execution_count": 35, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -846,9 +814,7 @@ { "cell_type": "code", "execution_count": 36, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_max_steps = 1000\n", @@ -895,9 +861,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -965,9 +929,7 @@ { "cell_type": "code", "execution_count": 40, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def render_policy_net(model_path, action, X, n_max_steps = 1000):\n", @@ -1024,9 +986,7 @@ { "cell_type": "code", "execution_count": 42, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", @@ -1069,9 +1029,7 @@ { "cell_type": "code", "execution_count": 43, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def discount_rewards(rewards, discount_rate):\n", @@ -1157,9 +1115,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env.close()" @@ -1309,9 +1265,7 @@ { "cell_type": "code", "execution_count": 51, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "n_states = 3\n", @@ -1336,9 +1290,7 @@ { "cell_type": "code", "execution_count": 52, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def optimal_policy(state):\n", @@ -1439,23 +1391,28 @@ { "cell_type": "code", "execution_count": 57, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "mspacman_color = np.array([210, 164, 74]).mean()\n", + "mspacman_color = 210 + 164 + 74\n", "\n", "def preprocess_observation(obs):\n", " img = obs[1:176:2, ::2] # crop and downsize\n", - " img = img.mean(axis=2) # to greyscale\n", + " img = img.sum(axis=2) # to greyscale\n", " img[img==mspacman_color] = 0 # Improve contrast\n", - " img = (img - 128) / 128 - 1 # normalize from -1. to 1.\n", + " img = (img // 3 - 128).astype(np.int8) # normalize from -128 to 127\n", " return img.reshape(88, 80, 1)\n", "\n", "img = preprocess_observation(obs)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: the `preprocess_observation()` function is slightly different from the one in the book: instead of representing pixels as 64-bit floats from -1.0 to 1.0, it represents them as signed bytes (from -128 to 127). The benefit is that the replay memory will take up roughly 8 times less RAM (about 6.5 GB instead of 52 GB). The reduced precision has no visible impact on training." + ] + }, { "cell_type": "code", "execution_count": 58, @@ -1498,9 +1455,7 @@ { "cell_type": "code", "execution_count": 59, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "reset_graph()\n", @@ -1520,7 +1475,7 @@ "initializer = tf.contrib.layers.variance_scaling_initializer()\n", "\n", "def q_network(X_state, name):\n", - " prev_layer = X_state\n", + " prev_layer = X_state / 128.0 # scale pixel intensities to the [-1.0, 1.0] range.\n", " with tf.variable_scope(name) as scope:\n", " for n_maps, kernel_size, strides, padding, activation in zip(\n", " conv_n_maps, conv_kernel_sizes, conv_strides,\n", @@ -1545,9 +1500,7 @@ { "cell_type": "code", "execution_count": 60, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "X_state = tf.placeholder(tf.float32, shape=[None, input_height, input_width,\n", @@ -1572,9 +1525,7 @@ { "cell_type": "code", "execution_count": 62, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "learning_rate = 0.001\n", @@ -1608,9 +1559,7 @@ { "cell_type": "code", "execution_count": 63, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from collections import deque\n", @@ -1632,9 +1581,7 @@ { "cell_type": "code", "execution_count": 64, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "eps_min = 0.1\n", @@ -1678,9 +1625,7 @@ { "cell_type": "code", "execution_count": 66, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "loss_val = np.infty\n", @@ -1970,7 +1915,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.3" }, "nav_menu": {}, "toc": { diff --git a/math_linear_algebra.ipynb b/math_linear_algebra.ipynb index 7d8f8cb..0718eff 100644 --- a/math_linear_algebra.ipynb +++ b/math_linear_algebra.ipynb @@ -2,10 +2,7 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**Math - Linear Algebra**\n", "\n", @@ -16,10 +13,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Before we start, let's ensure that this notebook works well in both Python 2 and 3:" ] @@ -27,11 +21,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "from __future__ import division, print_function, unicode_literals" @@ -39,10 +29,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Vectors\n", "## Definition\n", @@ -84,10 +71,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Vectors in python\n", "In python, a vector can be represented in many ways, the simplest being a regular python list of numbers:" @@ -96,11 +80,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "[10.5, 5.2, 3.25, 7.0]" @@ -108,10 +88,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Since we plan to do quite a lot of scientific calculations, it is much better to use NumPy's `ndarray`, which provides a lot of convenient and optimized implementations of essential mathematical operations on vectors (for more details about NumPy, check out the [NumPy tutorial](tools_numpy.ipynb)). For example:" ] @@ -119,11 +96,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -133,10 +106,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The size of a vector can be obtained using the `size` attribute:" ] @@ -144,11 +114,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "video.size" @@ -156,10 +122,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The $i^{th}$ element (also called *entry* or *item*) of a vector $\\textbf{v}$ is noted $\\textbf{v}_i$.\n", "\n", @@ -169,11 +132,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "video[2] # 3rd element" @@ -181,10 +140,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Plotting vectors\n", "To plot vectors we will use matplotlib, so let's start by importing it (for details about matplotlib, check the [matplotlib tutorial](tools_matplotlib.ipynb)):" @@ -193,11 +149,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", @@ -206,10 +158,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### 2D vectors\n", "Let's create a couple very simple 2D vectors to plot:" @@ -219,9 +168,7 @@ "cell_type": "code", "execution_count": 7, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -231,10 +178,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "These vectors each have 2 elements, so they can easily be represented graphically on a 2D graph, for example as points:" ] @@ -242,11 +186,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "x_coords, y_coords = zip(u, v)\n", @@ -258,10 +198,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Vectors can also be represented as arrows. Let's create a small convenience function to draw nice arrows:" ] @@ -270,9 +207,7 @@ "cell_type": "code", "execution_count": 9, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -284,10 +219,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now let's draw the vectors **u** and **v** as arrows:" ] @@ -295,11 +227,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plot_vector2d(u, color=\"r\")\n", @@ -311,10 +239,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### 3D vectors\n", "Plotting 3D vectors is also relatively straightforward. First let's create two 3D vectors:" @@ -324,9 +249,7 @@ "cell_type": "code", "execution_count": 11, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -336,10 +259,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now let's plot them using matplotlib's `Axes3D`:" ] @@ -347,11 +267,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "from mpl_toolkits.mplot3d import Axes3D\n", @@ -365,10 +281,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "It is a bit hard to visualize exactly where in space these two points are, so let's add vertical lines. We'll create a small convenience function to plot a list of 3d vectors with vertical lines attached:" ] @@ -376,11 +289,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def plot_vectors3d(ax, vectors3d, z0, **options):\n", @@ -398,10 +307,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Norm\n", "The norm of a vector $\\textbf{u}$, noted $\\left \\Vert \\textbf{u} \\right \\|$, is a measure of the length (a.k.a. the magnitude) of $\\textbf{u}$. There are multiple possible norms, but the most common one (and the only one we will discuss here) is the Euclidian norm, which is defined as:\n", @@ -414,11 +320,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def vector_norm(vector):\n", @@ -431,10 +333,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "However, it is much more efficient to use NumPy's `norm` function, available in the `linalg` (**Lin**ear **Alg**ebra) module:" ] @@ -442,11 +341,7 @@ { "cell_type": "code", "execution_count": 15, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy.linalg as LA\n", @@ -455,10 +350,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's plot a little diagram to confirm that the length of vector $\\textbf{v}$ is indeed $\\approx5.4$:" ] @@ -466,11 +358,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "radius = LA.norm(u)\n", @@ -483,20 +371,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Looks about right!" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Addition\n", "Vectors of same size can be added together. Addition is performed *elementwise*:" @@ -505,11 +387,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "print(\" \", u)\n", @@ -520,10 +398,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's look at what vector addition looks like graphically:" ] @@ -532,9 +407,6 @@ "cell_type": "code", "execution_count": 18, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, "scrolled": true }, "outputs": [], @@ -556,10 +428,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Vector addition is **commutative**, meaning that $\\textbf{u} + \\textbf{v} = \\textbf{v} + \\textbf{u}$. You can see it on the previous image: following $\\textbf{u}$ *then* $\\textbf{v}$ leads to the same point as following $\\textbf{v}$ *then* $\\textbf{u}$.\n", "\n", @@ -568,10 +437,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "If you have a shape defined by a number of points (vectors), and you add a vector $\\textbf{v}$ to all of these points, then the whole shape gets shifted by $\\textbf{v}$. This is called a [geometric translation](https://en.wikipedia.org/wiki/Translation_%28geometry%29):" ] @@ -579,11 +445,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "t1 = np.array([2, 0.25])\n", @@ -615,20 +477,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Finally, substracting a vector is like adding the opposite vector." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Multiplication by a scalar\n", "Vectors can be multiplied by scalars. All elements in the vector are multiplied by that number, for example:" @@ -637,11 +493,7 @@ { "cell_type": "code", "execution_count": 20, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "print(\"1.5 *\", u, \"=\")\n", @@ -651,10 +503,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Graphically, scalar multiplication results in changing the scale of a figure, hence the name *scalar*. The distance from the origin (the point at coordinates equal to zero) is also multiplied by the scalar. For example, let's scale up by a factor of `k = 2.5`:" ] @@ -662,11 +511,7 @@ { "cell_type": "code", "execution_count": 21, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "k = 2.5\n", @@ -694,10 +539,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "As you might guess, dividing a vector by a scalar is equivalent to multiplying by its inverse:\n", "\n", @@ -706,10 +548,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Scalar multiplication is **commutative**: $\\lambda \\times \\textbf{u} = \\textbf{u} \\times \\lambda$.\n", "\n", @@ -720,10 +559,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Zero, unit and normalized vectors\n", "* A **zero-vector ** is a vector full of 0s.\n", @@ -735,11 +571,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plt.gca().add_artist(plt.Circle((0,0),1,color='c'))\n", @@ -755,10 +587,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Dot product\n", "### Definition\n", @@ -779,11 +608,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def dot_product(v1, v2):\n", @@ -794,10 +619,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "But a *much* more efficient implementation is provided by NumPy with the `dot` function:" ] @@ -805,11 +627,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "np.dot(u,v)" @@ -817,10 +635,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Equivalently, you can use the `dot` method of `ndarray`s:" ] @@ -828,11 +643,7 @@ { "cell_type": "code", "execution_count": 25, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u.dot(v)" @@ -840,10 +651,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**Caution**: the `*` operator will perform an *elementwise* multiplication, *NOT* a dot product:" ] @@ -851,11 +659,7 @@ { "cell_type": "code", "execution_count": 26, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "print(\" \",u)\n", @@ -867,10 +671,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Main properties\n", "* The dot product is **commutative**: $\\textbf{u} \\cdot \\textbf{v} = \\textbf{v} \\cdot \\textbf{u}$.\n", @@ -882,17 +683,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Calculating the angle between vectors\n", "One of the many uses of the dot product is to calculate the angle between two non-zero vectors. Looking at the dot product definition, we can deduce the following formula:\n", "\n", "$\\theta = \\arccos{\\left ( \\dfrac{\\textbf{u} \\cdot \\textbf{v}}{\\left \\Vert \\textbf{u} \\right \\| \\times \\left \\Vert \\textbf{v} \\right \\|} \\right ) }$\n", "\n", - "Note that if $\\textbf{u} \\cdot \\textbf{v} = 0$, it follows that $\\theta = \\dfrac{π}{4}$. In other words, if the dot product of two non-null vectors is zero, it means that they are orthogonal.\n", + "Note that if $\\textbf{u} \\cdot \\textbf{v} = 0$, it follows that $\\theta = \\dfrac{π}{2}$. In other words, if the dot product of two non-null vectors is zero, it means that they are orthogonal.\n", "\n", "Let's use this formula to calculate the angle between $\\textbf{u}$ and $\\textbf{v}$ (in radians):" ] @@ -900,11 +698,7 @@ { "cell_type": "code", "execution_count": 27, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def vector_angle(u, v):\n", @@ -918,20 +712,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Note: due to small floating point errors, `cos_theta` may be very slightly outside of the $[-1, 1]$ interval, which would make `arccos` fail. This is why we clipped the value within the range, using NumPy's `clip` function." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Projecting a point onto an axis\n", "The dot product is also very useful to project points onto an axis. The projection of vector $\\textbf{v}$ onto $\\textbf{u}$'s axis is given by this formula:\n", @@ -946,11 +734,7 @@ { "cell_type": "code", "execution_count": 28, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u_normalized = u / LA.norm(u)\n", @@ -975,10 +759,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# Matrices\n", "A matrix is a rectangular array of scalars (ie. any number: integer, real or complex) arranged in rows and columns, for example:\n", @@ -992,10 +773,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Matrices in python\n", "In python, a matrix can be represented in various ways. The simplest is just a list of python lists:" @@ -1004,11 +782,7 @@ { "cell_type": "code", "execution_count": 29, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "[\n", @@ -1019,10 +793,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "A much more efficient way is to use the NumPy library which provides optimized implementations of many matrix operations:" ] @@ -1030,11 +801,7 @@ { "cell_type": "code", "execution_count": 30, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A = np.array([\n", @@ -1046,10 +813,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "By convention matrices generally have uppercase names, such as $A$.\n", "\n", @@ -1058,10 +822,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Size\n", "The size of a matrix is defined by its number of rows and number of columns. It is noted $rows \\times columns$. For example, the matrix $A$ above is an example of a $2 \\times 3$ matrix: 2 rows, 3 columns. Caution: a $3 \\times 2$ matrix would have 3 rows and 2 columns.\n", @@ -1072,11 +833,7 @@ { "cell_type": "code", "execution_count": 31, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.shape" @@ -1084,10 +841,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**Caution**: the `size` attribute represents the number of elements in the `ndarray`, not the matrix's size:" ] @@ -1095,11 +849,7 @@ { "cell_type": "code", "execution_count": 32, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.size" @@ -1107,10 +857,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Element indexing\n", "The number located in the $i^{th}$ row, and $j^{th}$ column of a matrix $X$ is sometimes noted $X_{i,j}$ or $X_{ij}$, but there is no standard notation, so people often prefer to explicitely name the elements, like this: \"*let $X = (x_{i,j})_{1 ≤ i ≤ m, 1 ≤ j ≤ n}$*\". This means that $X$ is equal to:\n", @@ -1129,11 +876,7 @@ { "cell_type": "code", "execution_count": 33, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A[1,2] # 2nd row, 3rd column" @@ -1141,10 +884,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The $i^{th}$ row vector is sometimes noted $M_i$ or $M_{i,*}$, but again there is no standard notation so people often prefer to explicitely define their own names, for example: \"*let **x**$_{i}$ be the $i^{th}$ row vector of matrix $X$*\". We will use the $M_{i,*}$, for the same reason as above. For example, to access $A_{2,*}$ (ie. $A$'s 2nd row vector):" ] @@ -1152,11 +892,7 @@ { "cell_type": "code", "execution_count": 34, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A[1, :] # 2nd row vector (as a 1D array)" @@ -1164,10 +900,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Similarly, the $j^{th}$ column vector is sometimes noted $M^j$ or $M_{*,j}$, but there is no standard notation. We will use $M_{*,j}$. For example, to access $A_{*,3}$ (ie. $A$'s 3rd column vector):" ] @@ -1175,11 +908,7 @@ { "cell_type": "code", "execution_count": 35, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A[:, 2] # 3rd column vector (as a 1D array)" @@ -1187,10 +916,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Note that the result is actually a one-dimensional NumPy array: there is no such thing as a *vertical* or *horizontal* one-dimensional array. If you need to actually represent a row vector as a one-row matrix (ie. a 2D NumPy array), or a column vector as a one-column matrix, then you need to use a slice instead of an integer when accessing the row or column, for example:" ] @@ -1198,11 +924,7 @@ { "cell_type": "code", "execution_count": 36, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A[1:2, :] # rows 2 to 3 (excluded): this returns row 2 as a one-row matrix" @@ -1211,11 +933,7 @@ { "cell_type": "code", "execution_count": 37, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A[:, 2:3] # columns 3 to 4 (excluded): this returns column 3 as a one-column matrix" @@ -1223,10 +941,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Square, triangular, diagonal and identity matrices\n", "A **square matrix** is a matrix that has the same number of rows and columns, for example a $3 \\times 3$ matrix:\n", @@ -1240,10 +955,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "An **upper triangular matrix** is a special kind of square matrix where all the elements *below* the main diagonal (top-left to bottom-right) are zero, for example:\n", "\n", @@ -1256,10 +968,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Similarly, a **lower triangular matrix** is a square matrix where all elements *above* the main diagonal are zero, for example:\n", "\n", @@ -1272,20 +981,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "A **triangular matrix** is one that is either lower triangular or upper triangular." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "A matrix that is both upper and lower triangular is called a **diagonal matrix**, for example:\n", "\n", @@ -1301,11 +1004,7 @@ { "cell_type": "code", "execution_count": 38, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "np.diag([4, 5, 6])" @@ -1313,10 +1012,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "If you pass a matrix to the `diag` function, it will happily extract the diagonal values:" ] @@ -1324,11 +1020,7 @@ { "cell_type": "code", "execution_count": 39, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "D = np.array([\n", @@ -1341,10 +1033,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Finally, the **identity matrix** of size $n$, noted $I_n$, is a diagonal matrix of size $n \\times n$ with $1$'s in the main diagonal, for example $I_3$:\n", "\n", @@ -1360,11 +1049,7 @@ { "cell_type": "code", "execution_count": 40, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "np.eye(3)" @@ -1372,20 +1057,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The identity matrix is often noted simply $I$ (instead of $I_n$) when its size is clear given the context. It is called the *identity* matrix because multiplying a matrix with it leaves the matrix unchanged as we will see below." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Adding matrices\n", "If two matrices $Q$ and $R$ have the same size $m \\times n$, they can be added together. Addition is performed *elementwise*: the result is also a $m \\times n$ matrix $S$ where each element is the sum of the elements at the corresponding position: $S_{i,j} = Q_{i,j} + R_{i,j}$\n", @@ -1405,11 +1084,7 @@ { "cell_type": "code", "execution_count": 41, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "B = np.array([[1,2,3], [4, 5, 6]])\n", @@ -1419,11 +1094,7 @@ { "cell_type": "code", "execution_count": 42, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A" @@ -1432,11 +1103,7 @@ { "cell_type": "code", "execution_count": 43, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A + B" @@ -1444,10 +1111,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**Addition is *commutative***, meaning that $A + B = B + A$:" ] @@ -1455,11 +1119,7 @@ { "cell_type": "code", "execution_count": 44, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "B + A" @@ -1467,10 +1127,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**It is also *associative***, meaning that $A + (B + C) = (A + B) + C$:" ] @@ -1478,11 +1135,7 @@ { "cell_type": "code", "execution_count": 45, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "C = np.array([[100,200,300], [400, 500, 600]])\n", @@ -1493,11 +1146,7 @@ { "cell_type": "code", "execution_count": 46, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "(A + B) + C" @@ -1505,10 +1154,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Scalar multiplication\n", "A matrix $M$ can be multiplied by a scalar $\\lambda$. The result is noted $\\lambda M$, and it is a matrix of the same size as $M$ with all elements multiplied by $\\lambda$:\n", @@ -1532,11 +1178,7 @@ { "cell_type": "code", "execution_count": 47, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "2 * A" @@ -1544,10 +1186,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Scalar multiplication is also defined on the right hand side, and gives the same result: $M \\lambda = \\lambda M$. For example:" ] @@ -1555,11 +1194,7 @@ { "cell_type": "code", "execution_count": 48, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A * 2" @@ -1567,10 +1202,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This makes scalar multiplication **commutative**.\n", "\n", @@ -1580,11 +1212,7 @@ { "cell_type": "code", "execution_count": 49, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "2 * (3 * A)" @@ -1593,11 +1221,7 @@ { "cell_type": "code", "execution_count": 50, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "(2 * 3) * A" @@ -1605,10 +1229,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Finally, it is **distributive over addition** of matrices, meaning that $\\lambda (Q + R) = \\lambda Q + \\lambda R$:" ] @@ -1616,11 +1237,7 @@ { "cell_type": "code", "execution_count": 51, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "2 * (A + B)" @@ -1629,11 +1246,7 @@ { "cell_type": "code", "execution_count": 52, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "2 * A + 2 * B" @@ -1641,10 +1254,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Matrix multiplication\n", "So far, matrix operations have been rather intuitive. But multiplying matrices is a bit more involved.\n", @@ -1689,10 +1299,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's multiply two matrices in NumPy, using `ndarray`'s `dot` method:\n", "\n", @@ -1714,11 +1321,7 @@ { "cell_type": "code", "execution_count": 53, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "D = np.array([\n", @@ -1732,10 +1335,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's check this result by looking at one element, just to be sure: looking at $E_{2,3}$ for example, we need to multiply elements in $A$'s $2^{nd}$ row by elements in $D$'s $3^{rd}$ column, and sum up these products:" ] @@ -1743,11 +1343,7 @@ { "cell_type": "code", "execution_count": 54, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "40*5 + 50*17 + 60*31" @@ -1756,11 +1352,7 @@ { "cell_type": "code", "execution_count": 55, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "E[1,2] # row 2, column 3" @@ -1768,10 +1360,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Looks good! You can check the other elements until you get used to the algorithm.\n", "\n", @@ -1781,11 +1370,7 @@ { "cell_type": "code", "execution_count": 56, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "try:\n", @@ -1796,10 +1381,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This illustrates the fact that **matrix multiplication is *NOT* commutative**: in general $QR ≠ RQ$\n", "\n", @@ -1809,11 +1391,7 @@ { "cell_type": "code", "execution_count": 57, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F = np.array([\n", @@ -1827,11 +1405,7 @@ { "cell_type": "code", "execution_count": 58, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F.dot(A)" @@ -1839,10 +1413,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "On the other hand, **matrix multiplication *is* associative**, meaning that $Q(RS) = (QR)S$. Let's create a $4 \\times 5$ matrix $G$ to illustrate this:" ] @@ -1850,11 +1421,7 @@ { "cell_type": "code", "execution_count": 59, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "G = np.array([\n", @@ -1868,11 +1435,7 @@ { "cell_type": "code", "execution_count": 60, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.dot(D.dot(G)) # A(BG)" @@ -1880,10 +1443,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "It is also ***distributive* over addition** of matrices, meaning that $(Q + R)S = QS + RS$. For example:" ] @@ -1891,11 +1451,7 @@ { "cell_type": "code", "execution_count": 61, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "(A + B).dot(D)" @@ -1904,11 +1460,7 @@ { "cell_type": "code", "execution_count": 62, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.dot(D) + B.dot(D)" @@ -1916,10 +1468,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The product of a matrix $M$ by the identity matrix (of matching size) results in the same matrix $M$. More formally, if $M$ is an $m \\times n$ matrix, then:\n", "\n", @@ -1935,11 +1484,7 @@ { "cell_type": "code", "execution_count": 63, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.dot(np.eye(3))" @@ -1948,11 +1493,7 @@ { "cell_type": "code", "execution_count": 64, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "np.eye(2).dot(A)" @@ -1960,10 +1501,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**Caution**: NumPy's `*` operator performs elementwise multiplication, *NOT* a matrix multiplication:" ] @@ -1972,9 +1510,6 @@ "cell_type": "code", "execution_count": 65, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, "scrolled": true }, "outputs": [], @@ -1984,10 +1519,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "**The @ infix operator**\n", "\n", @@ -1997,11 +1529,7 @@ { "cell_type": "code", "execution_count": 66, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "import sys\n", @@ -2016,20 +1544,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Note: `Q @ R` is actually equivalent to `Q.__matmul__(R)` which is implemented by NumPy as `np.matmul(Q, R)`, not as `Q.dot(R)`. The main difference is that `matmul` does not support scalar multiplication, while `dot` does, so you can write `Q.dot(3)`, which is equivalent to `Q * 3`, but you cannot write `Q @ 3` ([more details](http://stackoverflow.com/a/34142617/38626))." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Matrix transpose\n", "The transpose of a matrix $M$ is a matrix noted $M^T$ such that the $i^{th}$ row in $M^T$ is equal to the $i^{th}$ column in $M$:\n", @@ -2057,11 +1579,7 @@ { "cell_type": "code", "execution_count": 67, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A" @@ -2070,11 +1588,7 @@ { "cell_type": "code", "execution_count": 68, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.T" @@ -2082,10 +1596,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "As you might expect, transposing a matrix twice returns the original matrix:" ] @@ -2093,11 +1604,7 @@ { "cell_type": "code", "execution_count": 69, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.T.T" @@ -2105,10 +1612,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Transposition is distributive over addition of matrices, meaning that $(Q + R)^T = Q^T + R^T$. For example:" ] @@ -2116,11 +1620,7 @@ { "cell_type": "code", "execution_count": 70, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "(A + B).T" @@ -2129,11 +1629,7 @@ { "cell_type": "code", "execution_count": 71, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "A.T + B.T" @@ -2141,10 +1637,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Moreover, $(Q \\cdot R)^T = R^T \\cdot Q^T$. Note that the order is reversed. For example:" ] @@ -2152,11 +1645,7 @@ { "cell_type": "code", "execution_count": 72, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "(A.dot(D)).T" @@ -2165,11 +1654,7 @@ { "cell_type": "code", "execution_count": 73, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "D.T.dot(A.T)" @@ -2177,10 +1662,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "A **symmetric matrix** $M$ is defined as a matrix that is equal to its transpose: $M^T = M$. This definition implies that it must be a square matrix whose elements are symmetric relative to the main diagonal, for example:\n", "\n", @@ -2197,11 +1679,7 @@ { "cell_type": "code", "execution_count": 74, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "D.dot(D.T)" @@ -2210,9 +1688,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "source": [ "## Converting 1D arrays to 2D arrays in NumPy\n", @@ -2222,11 +1698,7 @@ { "cell_type": "code", "execution_count": 75, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u" @@ -2235,11 +1707,7 @@ { "cell_type": "code", "execution_count": 76, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u.T" @@ -2247,10 +1715,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "We want to convert $\\textbf{u}$ into a row vector before transposing it. There are a few ways to do this:" ] @@ -2258,11 +1723,7 @@ { "cell_type": "code", "execution_count": 77, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u_row = np.array([u])\n", @@ -2271,10 +1732,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Notice the extra square brackets: this is a 2D array with just one row (ie. a 1x2 matrix). In other words it really is a **row vector**." ] @@ -2282,11 +1740,7 @@ { "cell_type": "code", "execution_count": 78, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u[np.newaxis, :]" @@ -2294,10 +1748,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This quite explicit: we are asking for a new vertical axis, keeping the existing data as the horizontal axis." ] @@ -2305,11 +1756,7 @@ { "cell_type": "code", "execution_count": 79, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u[np.newaxis]" @@ -2317,10 +1764,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This is equivalent, but a little less explicit." ] @@ -2328,11 +1772,7 @@ { "cell_type": "code", "execution_count": 80, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u[None]" @@ -2340,10 +1780,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This is the shortest version, but you probably want to avoid it because it is unclear. The reason it works is that `np.newaxis` is actually equal to `None`, so this is equivalent to the previous version.\n", "\n", @@ -2353,11 +1790,7 @@ { "cell_type": "code", "execution_count": 81, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u_row.T" @@ -2365,10 +1798,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Great! We now have a nice **column vector**.\n", "\n", @@ -2378,11 +1808,7 @@ { "cell_type": "code", "execution_count": 82, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "u[:, np.newaxis]" @@ -2390,10 +1816,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Plotting a matrix\n", "We have already seen that vectors can been represented as points or arrows in N-dimensional space. Is there a good graphical representation of matrices? Well you can simply see a matrix as a list of vectors, so plotting a matrix results in many points or arrows. For example, let's create a $2 \\times 4$ matrix `P` and plot it as points:" @@ -2402,11 +1825,7 @@ { "cell_type": "code", "execution_count": 83, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "P = np.array([\n", @@ -2421,10 +1840,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Of course we could also have stored the same 4 vectors as row vectors instead of column vectors, resulting in a $4 \\times 2$ matrix (the transpose of $P$, in fact). It is really an arbitrary choice.\n", "\n", @@ -2434,11 +1850,7 @@ { "cell_type": "code", "execution_count": 84, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plt.plot(x_coords_P, y_coords_P, \"bo\")\n", @@ -2450,10 +1862,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Or you can represent it as a polygon: matplotlib's `Polygon` class expects an $n \\times 2$ NumPy array, not a $2 \\times n$ array, so we just need to give it $P^T$:" ] @@ -2461,11 +1870,7 @@ { "cell_type": "code", "execution_count": 85, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "from matplotlib.patches import Polygon\n", @@ -2477,10 +1882,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Geometric applications of matrix operations\n", "We saw earlier that vector addition results in a geometric translation, vector multiplication by a scalar results in rescaling (zooming in or out, centered on the origin), and vector dot product results in projecting a vector onto another vector, rescaling and measuring the resulting coordinate.\n", @@ -2490,10 +1892,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Addition = multiple geometric translations\n", "First, adding two matrices together is equivalent to adding all their vectors together. For example, let's create a $2 \\times 4$ matrix $H$ and add it to $P$, and look at the result:" @@ -2502,11 +1901,7 @@ { "cell_type": "code", "execution_count": 86, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "H = np.array([\n", @@ -2534,10 +1929,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "If we add a matrix full of identical vectors, we get a simple geometric translation:" ] @@ -2545,11 +1937,7 @@ { "cell_type": "code", "execution_count": 87, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "H2 = np.array([\n", @@ -2570,10 +1958,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Although matrices can only be added together if they have the same size, NumPy allows adding a row vector or a column vector to a matrix: this is called *broadcasting* and is explained in further details in the [NumPy tutorial](tools_numpy.ipynb). We could have obtained the same result as above with:" ] @@ -2581,11 +1966,7 @@ { "cell_type": "code", "execution_count": 88, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "P + [[-0.5], [0.4]] # same as P + H2, thanks to NumPy broadcasting" @@ -2593,10 +1974,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Scalar multiplication\n", "Multiplying a matrix by a scalar results in all its vectors being multiplied by that scalar, so unsurprisingly, the geometric result is a rescaling of the entire figure. For example, let's rescale our polygon by a factor of 60% (zooming out, centered on the origin):" @@ -2605,11 +1983,7 @@ { "cell_type": "code", "execution_count": 89, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def plot_transformation(P_before, P_after, text_before, text_after, axis = [0, 5, 0, 4], arrows=False):\n", @@ -2631,10 +2005,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Matrix multiplication – Projection onto an axis\n", "Matrix multiplication is more complex to visualize, but it is also the most powerful tool in the box.\n", @@ -2646,9 +2017,7 @@ "cell_type": "code", "execution_count": 90, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -2657,10 +2026,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now let's look at the dot product $U \\cdot P$:" ] @@ -2668,11 +2034,7 @@ { "cell_type": "code", "execution_count": 91, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "U.dot(P)" @@ -2680,10 +2042,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "These are the horizontal coordinates of the vectors in $P$. In other words, we just projected $P$ onto the horizontal axis:" ] @@ -2691,11 +2050,7 @@ { "cell_type": "code", "execution_count": 92, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "def plot_projection(U, P):\n", @@ -2719,10 +2074,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "We can actually project on any other axis by just replacing $U$ with any other unit vector. For example, let's project on the axis that is at a 30° angle above the horizontal axis:" ] @@ -2730,11 +2082,7 @@ { "cell_type": "code", "execution_count": 93, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "angle30 = 30 * np.pi / 180 # angle in radians\n", @@ -2745,20 +2093,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Good! Remember that the dot product of a unit vector and a matrix basically performs a projection on an axis and gives us the coordinates of the resulting points on that axis." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Matrix multiplication – Rotation\n", "Now let's create a $2 \\times 2$ matrix $V$ containing two unit vectors that make 30° and 120° angles with the horizontal axis:\n", @@ -2769,11 +2111,7 @@ { "cell_type": "code", "execution_count": 94, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "angle120 = 120 * np.pi / 180\n", @@ -2786,10 +2124,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's look at the product $VP$:" ] @@ -2797,11 +2132,7 @@ { "cell_type": "code", "execution_count": 95, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "V.dot(P)" @@ -2809,10 +2140,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The first row is equal to $V_{1,*} P$, which is the coordinates of the projection of $P$ onto the 30° axis, as we have seen above. The second row is $V_{2,*} P$, which is the coordinates of the projection of $P$ onto the 120° axis. So basically we obtained the coordinates of $P$ after rotating the horizontal and vertical axes by 30° (or equivalently after rotating the polygon by -30° around the origin)! Let's plot $VP$ to see this:" ] @@ -2820,11 +2148,7 @@ { "cell_type": "code", "execution_count": 96, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "P_rotated = V.dot(P)\n", @@ -2834,20 +2158,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Matrix $V$ is called a **rotation matrix**." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "### Matrix multiplication – Other linear transformations\n", "More generally, any linear transformation $f$ that maps n-dimensional vectors to m-dimensional vectors can be represented as an $m \\times n$ matrix. For example, say $\\textbf{u}$ is a 3-dimensional vector:\n", @@ -2883,9 +2201,6 @@ "cell_type": "code", "execution_count": 97, "metadata": { - "collapsed": false, - "deletable": true, - "editable": true, "scrolled": true }, "outputs": [], @@ -2901,10 +2216,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's look at how this transformation affects the **unit square**: " ] @@ -2912,11 +2224,7 @@ { "cell_type": "code", "execution_count": 98, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "Square = np.array([\n", @@ -2930,10 +2238,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now let's look at a **squeeze mapping**:" ] @@ -2941,11 +2246,7 @@ { "cell_type": "code", "execution_count": 99, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_squeeze = np.array([\n", @@ -2959,10 +2260,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The effect on the unit square is:" ] @@ -2970,11 +2268,7 @@ { "cell_type": "code", "execution_count": 100, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plot_transformation(Square, F_squeeze.dot(Square), \"$Square$\", \"$F_{squeeze} Square$\",\n", @@ -2984,10 +2278,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Let's show a last one: reflection through the horizontal axis:" ] @@ -2995,11 +2286,7 @@ { "cell_type": "code", "execution_count": 101, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_reflect = np.array([\n", @@ -3013,10 +2300,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Matrix inverse\n", "Now that we understand that a matrix can represent any linear transformation, a natural question is: can we find a transformation matrix that reverses the effect of a given transformation matrix $F$? The answer is yes… sometimes! When it exists, such a matrix is called the **inverse** of $F$, and it is noted $F^{-1}$.\n", @@ -3027,11 +2311,7 @@ { "cell_type": "code", "execution_count": 102, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_inv_shear = np.array([\n", @@ -3048,10 +2328,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "We applied a shear mapping on $P$, just like we did before, but then we applied a second transformation to the result, and *lo and behold* this had the effect of coming back to the original $P$ (we plotted the original $P$'s outline to double check). The second transformation is the inverse of the first one.\n", "\n", @@ -3061,11 +2338,7 @@ { "cell_type": "code", "execution_count": 103, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_inv_shear = LA.inv(F_shear)\n", @@ -3074,10 +2347,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Only square matrices can be inversed. This makes sense when you think about it: if you have a transformation that reduces the number of dimensions, then some information is lost and there is no way that you can get it back. For example say you use a $2 \\times 3$ matrix to project a 3D object onto a plane. The result may look like this:" ] @@ -3085,11 +2355,7 @@ { "cell_type": "code", "execution_count": 104, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plt.plot([0, 0, 1, 1, 0, 0.1, 0.1, 0, 0.1, 1.1, 1.0, 1.1, 1.1, 1.0, 1.1, 0.1],\n", @@ -3101,10 +2367,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Looking at this image, it is impossible to tell whether this is the projection of a cube or the projection of a narrow rectangular object. Some information has been lost in the projection.\n", "\n", @@ -3114,11 +2377,7 @@ { "cell_type": "code", "execution_count": 105, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_project = np.array([\n", @@ -3132,10 +2391,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This transformation matrix performs a projection onto the horizontal axis. Our polygon gets entirely flattened out so some information is entirely lost and it is impossible to go back to the original polygon using a linear transformation. In other words, $F_{project}$ has no inverse. Such a square matrix that cannot be inversed is called a **singular matrix** (aka degenerate matrix). If we ask NumPy to calculate its inverse, it raises an exception:" ] @@ -3143,11 +2399,7 @@ { "cell_type": "code", "execution_count": 106, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "try:\n", @@ -3158,10 +2410,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Here is another example of a singular matrix. This one performs a projection onto the axis at a 30° angle above the horizontal axis:" ] @@ -3169,11 +2418,7 @@ { "cell_type": "code", "execution_count": 107, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "angle30 = 30 * np.pi / 180\n", @@ -3188,10 +2433,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "But this time, due to floating point rounding errors, NumPy manages to calculate an inverse (notice how large the elements are, though):" ] @@ -3199,11 +2441,7 @@ { "cell_type": "code", "execution_count": 108, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.inv(F_project_30)" @@ -3211,10 +2449,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "As you might expect, the dot product of a matrix by its inverse results in the identity matrix:\n", "\n", @@ -3226,11 +2461,7 @@ { "cell_type": "code", "execution_count": 109, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_shear.dot(LA.inv(F_shear))" @@ -3238,10 +2469,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Another way to express this is that the inverse of the inverse of a matrix $M$ is $M$ itself:\n", "\n", @@ -3251,11 +2479,7 @@ { "cell_type": "code", "execution_count": 110, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.inv(LA.inv(F_shear))" @@ -3263,10 +2487,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Also, the inverse of scaling by a factor of $\\lambda$ is of course scaling by a factor or $\\frac{1}{\\lambda}$:\n", "\n", @@ -3280,11 +2501,7 @@ { "cell_type": "code", "execution_count": 111, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_involution = np.array([\n", @@ -3298,10 +2515,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Finally, a square matrix $H$ whose inverse is its own transpose is an **orthogonal matrix**:\n", "\n", @@ -3317,11 +2531,7 @@ { "cell_type": "code", "execution_count": 112, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_reflect.dot(F_reflect.T)" @@ -3329,10 +2539,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Determinant\n", "The determinant of a square matrix $M$, noted $\\det(M)$ or $\\det M$ or $|M|$ is a value that can be calculated from its elements $(M_{i,j})$ using various equivalent methods. One of the simplest methods is this recursive approach:\n", @@ -3370,10 +2577,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "To get the determinant of a matrix, you can call NumPy's `det` function in the `numpy.linalg` module:" ] @@ -3381,11 +2585,7 @@ { "cell_type": "code", "execution_count": 113, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "M = np.array([\n", @@ -3398,10 +2598,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "One of the main uses of the determinant is to *determine* whether a square matrix can be inversed or not: if the determinant is equal to 0, then the matrix *cannot* be inversed (it is a singular matrix), and if the determinant is not 0, then it *can* be inversed.\n", "\n", @@ -3411,11 +2608,7 @@ { "cell_type": "code", "execution_count": 114, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.det(F_project)" @@ -3423,10 +2616,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "That's right, $F_{project}$ is singular, as we saw earlier." ] @@ -3434,11 +2624,7 @@ { "cell_type": "code", "execution_count": 115, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.det(F_project_30)" @@ -3446,10 +2632,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "This determinant is suspiciously close to 0: it really should be 0, but it's not due to tiny floating point errors. The matrix is actually singular." ] @@ -3457,11 +2640,7 @@ { "cell_type": "code", "execution_count": 116, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.det(F_shear)" @@ -3469,20 +2648,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Perfect! This matrix *can* be inversed as we saw earlier. Wow, math really works!" ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The determinant can also be used to measure how much a linear transformation affects surface areas: for example, the projection matrices $F_{project}$ and $F_{project\\_30}$ completely flatten the polygon $P$, until its area is zero. This is why the determinant of these matrices is 0. The shear mapping modified the shape of the polygon, but it did not affect its surface area, which is why the determinant is 1. You can try computing the determinant of a rotation matrix, and you should also find 1. What about a scaling matrix? Let's see:" ] @@ -3490,11 +2663,7 @@ { "cell_type": "code", "execution_count": 117, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_scale = np.array([\n", @@ -3508,10 +2677,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "We rescaled the polygon by a factor of 1/2 on both vertical and horizontal axes so the surface area of the resulting polygon is 1/4$^{th}$ of the original polygon. Let's compute the determinant and check that:" ] @@ -3519,11 +2685,7 @@ { "cell_type": "code", "execution_count": 118, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.det(F_scale)" @@ -3531,10 +2693,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Correct!\n", "\n", @@ -3544,11 +2703,7 @@ { "cell_type": "code", "execution_count": 119, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.det(F_reflect)" @@ -3556,10 +2711,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Composing linear transformations\n", "Several linear transformations can be chained simply by performing multiple dot products in a row. For example, to perform a squeeze mapping followed by a shear mapping, just write:" @@ -3568,11 +2720,7 @@ { "cell_type": "code", "execution_count": 120, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "P_squeezed_then_sheared = F_shear.dot(F_squeeze.dot(P))" @@ -3580,10 +2728,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Since the dot product is associative, the following code is equivalent:" ] @@ -3591,11 +2736,7 @@ { "cell_type": "code", "execution_count": 121, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "P_squeezed_then_sheared = (F_shear.dot(F_squeeze)).dot(P)" @@ -3603,10 +2744,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Note that the order of the transformations is the reverse of the dot product order.\n", "\n", @@ -3617,9 +2755,7 @@ "cell_type": "code", "execution_count": 122, "metadata": { - "collapsed": true, - "deletable": true, - "editable": true + "collapsed": true }, "outputs": [], "source": [ @@ -3629,20 +2765,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "From now on we can perform both transformations in just one dot product, which can lead to a very significant performance boost." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "What if you want to perform the inverse of this double transformation? Well, if you squeezed and then you sheared, and you want to undo what you have done, it should be obvious that you should unshear first and then unsqueeze. In more mathematical terms, given two invertible (aka nonsingular) matrices $Q$ and $R$:\n", "\n", @@ -3654,11 +2784,7 @@ { "cell_type": "code", "execution_count": 123, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "LA.inv(F_shear.dot(F_squeeze)) == LA.inv(F_squeeze).dot(LA.inv(F_shear))" @@ -3666,10 +2792,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Singular Value Decomposition\n", "It turns out that any $m \\times n$ matrix $M$ can be decomposed into the dot product of three simple matrices:\n", @@ -3685,11 +2808,7 @@ { "cell_type": "code", "execution_count": 124, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "U, S_diag, V_T = LA.svd(F_shear) # note: in python 3 you can rename S_diag to Σ_diag\n", @@ -3699,11 +2818,7 @@ { "cell_type": "code", "execution_count": 125, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "S_diag" @@ -3711,10 +2826,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Note that this is just a 1D array containing the diagonal values of Σ. To get the actual matrix Σ, we can use NumPy's `diag` function:" ] @@ -3722,11 +2834,7 @@ { "cell_type": "code", "execution_count": 126, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "S = np.diag(S_diag)\n", @@ -3735,10 +2843,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now let's check that $U \\cdot \\Sigma \\cdot V^T$ is indeed equal to `F_shear`:" ] @@ -3746,11 +2851,7 @@ { "cell_type": "code", "execution_count": 127, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "U.dot(np.diag(S_diag)).dot(V_T)" @@ -3759,11 +2860,7 @@ { "cell_type": "code", "execution_count": 128, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "F_shear" @@ -3771,10 +2868,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "It worked like a charm. Let's apply these transformations one by one (in reverse order) on the unit square to understand what's going on. First, let's apply the first rotation $V^T$:" ] @@ -3782,11 +2876,7 @@ { "cell_type": "code", "execution_count": 129, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plot_transformation(Square, V_T.dot(Square), \"$Square$\", \"$V^T \\cdot Square$\",\n", @@ -3796,10 +2886,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Now let's rescale along the vertical and horizontal axes using $\\Sigma$:" ] @@ -3807,11 +2894,7 @@ { "cell_type": "code", "execution_count": 130, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plot_transformation(V_T.dot(Square), S.dot(V_T).dot(Square), \"$V^T \\cdot Square$\", \"$\\Sigma \\cdot V^T \\cdot Square$\",\n", @@ -3821,10 +2904,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Finally, we apply the second rotation $U$:" ] @@ -3832,11 +2912,7 @@ { "cell_type": "code", "execution_count": 131, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "plot_transformation(S.dot(V_T).dot(Square), U.dot(S).dot(V_T).dot(Square),\"$\\Sigma \\cdot V^T \\cdot Square$\", \"$U \\cdot \\Sigma \\cdot V^T \\cdot Square$\",\n", @@ -3846,20 +2922,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "And we can see that the result is indeed a shear mapping of the original unit square." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Eigenvectors and eigenvalues\n", "An **eigenvector** of a square matrix $M$ (also called a **characteristic vector**) is a non-zero vector that remains on the same line after transformation by the linear transformation associated with $M$. A more formal definition is any vector $v$ such that:\n", @@ -3880,11 +2950,7 @@ { "cell_type": "code", "execution_count": 132, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "eigenvalues, eigenvectors = LA.eig(F_squeeze)\n", @@ -3894,11 +2960,7 @@ { "cell_type": "code", "execution_count": 133, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "eigenvectors # [v0, v1, …]" @@ -3906,10 +2968,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Indeed the horizontal vectors are stretched by a factor of 1.4, and the vertical vectors are shrunk by a factor of 1/1.4=0.714…, so far so good. Let's look at the shear mapping matrix $F_{shear}$:" ] @@ -3917,11 +2976,7 @@ { "cell_type": "code", "execution_count": 134, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "eigenvalues2, eigenvectors2 = LA.eig(F_shear)\n", @@ -3931,11 +2986,7 @@ { "cell_type": "code", "execution_count": 135, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "eigenvectors2 # [v0, v1, …]" @@ -3943,20 +2994,14 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "Wait, what!? We expected just one unit eigenvector, not two. The second vector is almost equal to $\\begin{pmatrix}-1 \\\\ 0 \\end{pmatrix}$, which is on the same line as the first vector $\\begin{pmatrix}1 \\\\ 0 \\end{pmatrix}$. This is due to floating point errors. We can safely ignore vectors that are (almost) colinear (ie. on the same line)." ] }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "## Trace\n", "The trace of a square matrix $M$, noted $tr(M)$ is the sum of the values on its main diagonal. For example:" @@ -3965,11 +3010,7 @@ { "cell_type": "code", "execution_count": 136, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "D = np.array([\n", @@ -3982,10 +3023,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "The trace does not have a simple geometric interpretation (in general), but it has a number of properties that make it useful in many areas:\n", "* $tr(A + B) = tr(A) + tr(B)$\n", @@ -4000,11 +3038,7 @@ { "cell_type": "code", "execution_count": 137, - "metadata": { - "collapsed": false, - "deletable": true, - "editable": true - }, + "metadata": {}, "outputs": [], "source": [ "np.trace(F_project)" @@ -4012,10 +3046,7 @@ }, { "cell_type": "markdown", - "metadata": { - "deletable": true, - "editable": true - }, + "metadata": {}, "source": [ "# What next?\n", "This concludes this introduction to Linear Algebra. Although these basics cover most of what you will need to know for Machine Learning, if you wish to go deeper into this topic there are many options available: Linear Algebra [books](http://linear.axler.net/), [Khan Academy](https://www.khanacademy.org/math/linear-algebra) lessons, or just [Wikipedia](https://en.wikipedia.org/wiki/Linear_algebra) pages. " @@ -4047,7 +3078,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.2" }, "toc": { "toc_cell": false, @@ -4057,5 +3088,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 }