From 64f0e05a941897d3475c2f4cce7ca573f68daac4 Mon Sep 17 00:00:00 2001 From: B D <73541689+lebaste77@users.noreply.github.com> Date: Sun, 28 Feb 2021 12:02:23 +0100 Subject: [PATCH] Minor change on greedy policy variable usage Chap 18, why not using directly the 'n_outputs' variable defined earlier, instead of hardcoded '2' --- 18_reinforcement_learning.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/18_reinforcement_learning.ipynb b/18_reinforcement_learning.ipynb index ed44866..b723f04 100644 --- a/18_reinforcement_learning.ipynb +++ b/18_reinforcement_learning.ipynb @@ -1306,7 +1306,7 @@ "source": [ "def epsilon_greedy_policy(state, epsilon=0):\n", " if np.random.rand() < epsilon:\n", - " return np.random.randint(2)\n", + " return np.random.randint(n_outputs)\n", " else:\n", " Q_values = model.predict(state[np.newaxis])\n", " return np.argmax(Q_values[0])"