From 749817ccfa6b172f9647a0298560a6c0a2234836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Thu, 18 Feb 2021 11:59:02 +1300 Subject: [PATCH] Update libraries to latest version, including TensorFlow 2.4.1 and Scikit-Learn 0.24.1 --- 18_reinforcement_learning.ipynb | 49 +++++++++++++++++---------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/18_reinforcement_learning.ipynb b/18_reinforcement_learning.ipynb index b17da27..ed44866 100644 --- a/18_reinforcement_learning.ipynb +++ b/18_reinforcement_learning.ipynb @@ -639,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1316,7 +1316,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We will also need a replay memory. It will contain the agent's experiences, in the form of tuples: `(obs, action, reward, next_obs, done)`. We can use the `deque` class for that:" + "We will also need a replay memory. It will contain the agent's experiences, in the form of tuples: `(obs, action, reward, next_obs, done)`. We can use the `deque` class for that (but make sure to check out DeepMind's excellent [Reverb library](https://github.com/deepmind/reverb) for a much more robust implementation of experience replay):" ] }, { @@ -1860,20 +1860,13 @@ "env.reset()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Warning**: since TF Agents 0.4.0, there seems to be an issue with passing an integer to the `env.step()` method (it raises an `AttributeError`). You need to wrap it in a NumPy array, as done below. Please see [TF Agents Issue #520](https://github.com/tensorflow/agents/issues/520) for more details." - ] - }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ - "env.step(np.array(1)) # Fire" + "env.step(1) # Fire" ] }, { @@ -2081,9 +2074,9 @@ "source": [ "env.seed(42)\n", "env.reset()\n", - "time_step = env.step(np.array(1)) # FIRE\n", + "time_step = env.step(1) # FIRE\n", "for _ in range(4):\n", - " time_step = env.step(np.array(3)) # LEFT" + " time_step = env.step(3) # LEFT" ] }, { @@ -2194,13 +2187,9 @@ "source": [ "from tf_agents.agents.dqn.dqn_agent import DqnAgent\n", "\n", - "# see TF-agents issue #113\n", - "#optimizer = keras.optimizers.RMSprop(lr=2.5e-4, rho=0.95, momentum=0.0,\n", - "# epsilon=0.00001, centered=True)\n", - "\n", "train_step = tf.Variable(0)\n", "update_period = 4 # run a training step every 4 collect steps\n", - "optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=2.5e-4, decay=0.95, momentum=0.0,\n", + "optimizer = keras.optimizers.RMSprop(lr=2.5e-4, rho=0.95, momentum=0.0,\n", " epsilon=0.00001, centered=True)\n", "epsilon_fn = keras.optimizers.schedules.PolynomialDecay(\n", " initial_learning_rate=1.0, # initial ε\n", @@ -2222,7 +2211,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Create the replay buffer (this may use a lot of RAM, so please reduce the buffer size if you get an out-of-memory error):" + "Create the replay buffer (this will use a lot of RAM, so please reduce the buffer size if you get an out-of-memory error):" ] }, { @@ -2236,7 +2225,7 @@ "replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(\n", " data_spec=agent.collect_data_spec,\n", " batch_size=tf_env.batch_size,\n", - " max_length=1000000)\n", + " max_length=1000000) # reduce if OOM error\n", "\n", "replay_buffer_observer = replay_buffer.add_batch" ] @@ -2363,16 +2352,28 @@ "Let's sample 2 sub-episodes, with 3 time steps each and display them:" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: `replay_buffer.get_next()` is deprecated. We must use `replay_buffer.as_dataset(..., single_deterministic_pass=False)` instead." + ] + }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ - "tf.random.set_seed(888) # chosen to show an example of trajectory at the end of an episode\n", + "tf.random.set_seed(93) # chosen to show an example of trajectory at the end of an episode\n", "\n", - "trajectories, buffer_info = replay_buffer.get_next(\n", - " sample_batch_size=2, num_steps=3)" + "#trajectories, buffer_info = replay_buffer.get_next(\n", + "# sample_batch_size=2, num_steps=3)\n", + "\n", + "trajectories, buffer_info = next(iter(replay_buffer.as_dataset(\n", + " sample_batch_size=2,\n", + " num_steps=3,\n", + " single_deterministic_pass=False)))" ] }, { @@ -2528,7 +2529,7 @@ " lives = tf_env.pyenv.envs[0].ale.lives()\n", " if prev_lives != lives:\n", " tf_env.reset()\n", - " tf_env.pyenv.envs[0].step(np.array(1))\n", + " tf_env.pyenv.envs[0].step(1)\n", " prev_lives = lives\n", "\n", "watch_driver = DynamicStepDriver(\n", @@ -2797,7 +2798,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.7.9" } }, "nbformat": 4,