diff --git a/16_reinforcement_learning.ipynb b/16_reinforcement_learning.ipynb index 7061f4e..ddd4467 100644 --- a/16_reinforcement_learning.ipynb +++ b/16_reinforcement_learning.ipynb @@ -55,12 +55,14 @@ "\n", "# Common imports\n", "import numpy as np\n", - "import numpy.random as rnd\n", "import os\n", "import sys\n", "\n", "# to make this notebook's output stable across runs\n", - "rnd.seed(42)\n", + "def reset_graph(seed=42):\n", + " tf.reset_default_graph()\n", + " tf.set_random_seed(seed)\n", + " np.random.seed(seed)\n", "\n", "# To plot pretty figures and animations\n", "%matplotlib nbagg\n", @@ -83,6 +85,13 @@ " plt.savefig(path, format='png', dpi=300)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: there may be minor differences between the output of this notebook and the examples shown in the book. You can safely ignore these differences. They are mainly due to the fact that most of the environments provided by OpenAI gym have some randomness." + ] + }, { "cell_type": "markdown", "metadata": { @@ -840,6 +849,19 @@ "save_fig(\"cart_pole_plot\")" ] }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "img.shape" + ] + }, { "cell_type": "markdown", "metadata": { @@ -852,7 +874,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, @@ -869,7 +891,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, @@ -912,7 +934,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, @@ -944,7 +966,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, @@ -988,7 +1010,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n", "\n", @@ -1000,7 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, @@ -1062,7 +1087,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, @@ -1099,7 +1124,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": { "collapsed": false, "deletable": true, @@ -1123,7 +1148,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, @@ -1133,7 +1158,7 @@ "source": [ "import tensorflow as tf\n", "\n", - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 4\n", "n_hidden = 4\n", @@ -1172,7 +1197,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": { "collapsed": false, "deletable": true, @@ -1202,7 +1227,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, @@ -1229,7 +1254,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": { "collapsed": false, "deletable": true, @@ -1276,7 +1301,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, @@ -1286,7 +1311,7 @@ "source": [ "import tensorflow as tf\n", "\n", - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "n_inputs = 4\n", "n_hidden = 4\n", @@ -1323,7 +1348,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "metadata": { "collapsed": true, "deletable": true, @@ -1349,7 +1374,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, @@ -1362,7 +1387,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "metadata": { "collapsed": false, "deletable": true, @@ -1375,7 +1400,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, @@ -1425,7 +1450,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "metadata": { "collapsed": true, "deletable": true, @@ -1438,7 +1463,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "metadata": { "collapsed": false, "deletable": true, @@ -1463,7 +1488,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "metadata": { "collapsed": false, "deletable": true, @@ -1508,7 +1533,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 50, "metadata": { "collapsed": false, "deletable": true, @@ -1603,7 +1628,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "metadata": { "collapsed": false, "deletable": true, @@ -1632,7 +1657,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 52, "metadata": { "collapsed": false, "deletable": true, @@ -1646,7 +1671,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 53, "metadata": { "collapsed": false, "deletable": true, @@ -1659,7 +1684,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 54, "metadata": { "collapsed": false, "deletable": true, @@ -1686,7 +1711,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 55, "metadata": { "collapsed": false, "deletable": true, @@ -1700,7 +1725,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 56, "metadata": { "collapsed": false, "deletable": true, @@ -1713,7 +1738,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 57, "metadata": { "collapsed": false, "deletable": true, @@ -1746,7 +1771,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 58, "metadata": { "collapsed": false, "deletable": true, @@ -1768,7 +1793,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 59, "metadata": { "collapsed": false, "deletable": true, @@ -1801,7 +1826,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "Note: instead of using `tf.contrib.layers.convolution2d()` or `tf.contrib.layers.conv2d()` (as in the book), we now use the `tf.layers.conv2d()`, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same, except that the parameter names have changed slightly:\n", "* the `num_outputs` parameter was renamed to `filters`,\n", @@ -1814,7 +1842,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 60, "metadata": { "collapsed": false, "deletable": true, @@ -1822,7 +1850,7 @@ }, "outputs": [], "source": [ - "tf.reset_default_graph()\n", + "reset_graph()\n", "\n", "input_height = 88\n", "input_width = 80\n", @@ -1877,7 +1905,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 61, "metadata": { "collapsed": false, "deletable": true, @@ -1890,7 +1918,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 62, "metadata": { "collapsed": false, "deletable": true, @@ -1916,7 +1944,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 63, "metadata": { "collapsed": true, "deletable": true, @@ -1939,7 +1967,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 64, "metadata": { "collapsed": false, "deletable": true,