Merge pull request #396 from lebaste77/master
very minor change on greedy policy variable usagemain
commit
47538082f8
|
@ -1306,7 +1306,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"def epsilon_greedy_policy(state, epsilon=0):\n",
|
"def epsilon_greedy_policy(state, epsilon=0):\n",
|
||||||
" if np.random.rand() < epsilon:\n",
|
" if np.random.rand() < epsilon:\n",
|
||||||
" return np.random.randint(2)\n",
|
" return np.random.randint(n_outputs)\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" Q_values = model.predict(state[np.newaxis])\n",
|
" Q_values = model.predict(state[np.newaxis])\n",
|
||||||
" return np.argmax(Q_values[0])"
|
" return np.argmax(Q_values[0])"
|
||||||
|
|
Loading…
Reference in New Issue