Use tf.layers instead of tf.contrib.layers
parent
14101abcf9
commit
326d32cae0
|
@ -584,7 +584,17 @@
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Using `fully_connected` instead of `neuron_layer()`"
|
"## Using `dense()` instead of `neuron_layer()`"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function, except for a few minor differences:\n",
|
||||||
|
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
|
||||||
|
"* the default `activation` is now `None` rather than `tf.nn.relu`.\n",
|
||||||
|
"* a few more differences are presented in chapter 11."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -599,8 +609,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_inputs = 28*28 # MNIST\n",
|
"n_inputs = 28*28 # MNIST\n",
|
||||||
"n_hidden1 = 300\n",
|
"n_hidden1 = 300\n",
|
||||||
"n_hidden2 = 100\n",
|
"n_hidden2 = 100\n",
|
||||||
|
@ -611,9 +619,9 @@
|
||||||
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"dnn\"):\n",
|
"with tf.name_scope(\"dnn\"):\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
" hidden1 = tf.layers.dense(X, n_hidden1, name=\"hidden1\", activation=tf.nn.relu)\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
" hidden2 = tf.layers.dense(hidden1, n_hidden2, name=\"hidden2\", activation=tf.nn.relu)\n",
|
||||||
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
" logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"loss\"):\n",
|
"with tf.name_scope(\"loss\"):\n",
|
||||||
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
|
@ -719,7 +727,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {
|
"nav_menu": {
|
||||||
"height": "264px",
|
"height": "264px",
|
||||||
|
|
|
@ -297,6 +297,20 @@
|
||||||
" display(HTML(iframe))"
|
" display(HTML(iframe))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
|
||||||
|
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
|
||||||
|
"* the default `activation` is now `None` rather than `tf.nn.relu`.\n",
|
||||||
|
"* it does not support `tensorflow.contrib.framework.arg_scope()` (introduced later in chapter 11).\n",
|
||||||
|
"* it does not support regularizer params (introduced later in chapter 11)."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 12,
|
||||||
|
@ -307,8 +321,6 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"n_inputs = 28*28 # MNIST\n",
|
"n_inputs = 28*28 # MNIST\n",
|
||||||
|
@ -321,9 +333,9 @@
|
||||||
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"dnn\"):\n",
|
"with tf.name_scope(\"dnn\"):\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1, activation_fn=leaky_relu, scope=\"hidden1\")\n",
|
" hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name=\"hidden1\")\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=leaky_relu, scope=\"hidden2\")\n",
|
" hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name=\"hidden2\")\n",
|
||||||
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
" logits = tf.layers.dense(hidden2, n_outputs, name=\"outputs\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"loss\"):\n",
|
"with tf.name_scope(\"loss\"):\n",
|
||||||
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
|
@ -377,6 +389,24 @@
|
||||||
"# Batch Normalization"
|
"# Batch Normalization"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: the book uses `tensorflow.contrib.layers.batch_norm()` rather than `tf.layers.batch_normalization()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.batch_normalization()`, because anything in the contrib module may change or be deleted without notice. Instead of using the `batch_norm()` function as a regularizer parameter to the `fully_connected()` function, we now use `batch_normalization()` and we explicitly create a distinct layer. The parameters are a bit different, in particular:\n",
|
||||||
|
"* `decay` is renamed to `momentum`,\n",
|
||||||
|
"* `is_training` is renamed to `training`,\n",
|
||||||
|
"* `updates_collections` is removed: the update operations needed by batch normalization are added to the `UPDATE_OPS` collection and you need to explicity run these operations during training (see the execution phase below),\n",
|
||||||
|
"* we don't need to specify `scale=True`, as that is the default.\n",
|
||||||
|
"\n",
|
||||||
|
"Also note that in order to run batch norm just _before_ each hidden layer's activation function, we apply the ELU activation function manually, right after the batch norm layer.\n",
|
||||||
|
"\n",
|
||||||
|
"Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`). As you can see, the code remains very similar."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 14,
|
||||||
|
@ -387,11 +417,10 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from tensorflow.contrib.layers import fully_connected, batch_norm\n",
|
|
||||||
"from tensorflow.contrib.framework import arg_scope\n",
|
|
||||||
"\n",
|
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"from functools import partial\n",
|
||||||
|
"\n",
|
||||||
"n_inputs = 28 * 28 # MNIST\n",
|
"n_inputs = 28 * 28 # MNIST\n",
|
||||||
"n_hidden1 = 300\n",
|
"n_hidden1 = 300\n",
|
||||||
"n_hidden2 = 100\n",
|
"n_hidden2 = 100\n",
|
||||||
|
@ -405,22 +434,23 @@
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"dnn\"):\n",
|
"with tf.name_scope(\"dnn\"):\n",
|
||||||
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
||||||
" batch_norm_params = {\n",
|
|
||||||
" 'is_training': is_training,\n",
|
|
||||||
" 'decay': 0.9,\n",
|
|
||||||
" 'updates_collections': None,\n",
|
|
||||||
" 'scale': True,\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" with arg_scope(\n",
|
" my_batch_norm_layer = partial(\n",
|
||||||
" [fully_connected],\n",
|
" tf.layers.batch_normalization,\n",
|
||||||
" activation_fn=tf.nn.elu,\n",
|
" training=is_training,\n",
|
||||||
" weights_initializer=he_init,\n",
|
" momentum=0.9)\n",
|
||||||
" normalizer_fn=batch_norm,\n",
|
"\n",
|
||||||
" normalizer_params=batch_norm_params):\n",
|
" my_dense_layer = partial(\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
" tf.layers.dense,\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
" kernel_initializer=he_init)\n",
|
||||||
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
"\n",
|
||||||
|
" hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
|
||||||
|
" bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n",
|
||||||
|
" hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n",
|
||||||
|
" bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n",
|
||||||
|
" logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n",
|
||||||
|
" logits = my_batch_norm_layer(logits_before_bn)\n",
|
||||||
|
" extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"loss\"):\n",
|
"with tf.name_scope(\"loss\"):\n",
|
||||||
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
|
@ -438,6 +468,16 @@
|
||||||
"saver = tf.train.Saver()"
|
"saver = tf.train.Saver()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: since we are using `tf.layers.batch_normalization()` rather than `tf.contrib.layers.batch_norm()` (as in the book), we need to explicitly run the extra update operations needed by batch normalization (`sess.run([training_op, extra_update_ops],...`)."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 15,
|
||||||
|
@ -449,14 +489,14 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"n_epochs = 20\n",
|
"n_epochs = 20\n",
|
||||||
"batch_size = 50\n",
|
"batch_size = 200\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.Session() as sess:\n",
|
"with tf.Session() as sess:\n",
|
||||||
" init.run()\n",
|
" init.run()\n",
|
||||||
" for epoch in range(n_epochs):\n",
|
" for epoch in range(n_epochs):\n",
|
||||||
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
||||||
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
||||||
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
" sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
||||||
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
||||||
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
||||||
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
||||||
|
@ -464,11 +504,21 @@
|
||||||
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
" save_path = saver.save(sess, \"my_model_final.ckpt\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Now the same model with $\\ell_1$ regularization:"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 16,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
|
@ -476,29 +526,32 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"from functools import partial\n",
|
||||||
|
"\n",
|
||||||
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
||||||
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
||||||
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"dnn\"):\n",
|
"with tf.name_scope(\"dnn\"):\n",
|
||||||
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
||||||
" batch_norm_params = {\n",
|
|
||||||
" 'is_training': is_training,\n",
|
|
||||||
" 'decay': 0.9,\n",
|
|
||||||
" 'updates_collections': None,\n",
|
|
||||||
" 'scale': True,\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" with arg_scope(\n",
|
" my_batch_norm_layer = partial(\n",
|
||||||
" [fully_connected],\n",
|
" tf.layers.batch_normalization,\n",
|
||||||
" activation_fn=tf.nn.elu,\n",
|
" training=is_training,\n",
|
||||||
" weights_initializer=he_init,\n",
|
" momentum=0.9)\n",
|
||||||
" normalizer_fn=batch_norm,\n",
|
"\n",
|
||||||
" normalizer_params=batch_norm_params,\n",
|
" my_dense_layer = partial(\n",
|
||||||
" weights_regularizer=tf.contrib.layers.l1_regularizer(0.01)):\n",
|
" tf.layers.dense,\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
" kernel_initializer=he_init,\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
" kernel_regularizer=tf.contrib.layers.l1_regularizer(0.01))\n",
|
||||||
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
"\n",
|
||||||
|
" hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
|
||||||
|
" bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))\n",
|
||||||
|
" hidden2 = my_dense_layer(bn1, n_hidden2, name=\"hidden2\")\n",
|
||||||
|
" bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))\n",
|
||||||
|
" logits_before_bn = my_dense_layer(bn2, n_outputs, activation=None, name=\"outputs\")\n",
|
||||||
|
" logits = my_batch_norm_layer(logits_before_bn)\n",
|
||||||
|
" extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"loss\"):\n",
|
"with tf.name_scope(\"loss\"):\n",
|
||||||
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
|
@ -513,7 +566,7 @@
|
||||||
"with tf.name_scope(\"eval\"):\n",
|
"with tf.name_scope(\"eval\"):\n",
|
||||||
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||||||
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||||||
" \n",
|
"\n",
|
||||||
"init = tf.global_variables_initializer()\n",
|
"init = tf.global_variables_initializer()\n",
|
||||||
"saver = tf.train.Saver()"
|
"saver = tf.train.Saver()"
|
||||||
]
|
]
|
||||||
|
@ -529,14 +582,14 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"n_epochs = 20\n",
|
"n_epochs = 20\n",
|
||||||
"batch_size = 50\n",
|
"batch_size = 200\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.Session() as sess:\n",
|
"with tf.Session() as sess:\n",
|
||||||
" init.run()\n",
|
" init.run()\n",
|
||||||
" for epoch in range(n_epochs):\n",
|
" for epoch in range(n_epochs):\n",
|
||||||
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
" for iteration in range(len(mnist.test.labels)//batch_size):\n",
|
||||||
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
||||||
" sess.run(training_op, feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
" sess.run([training_op, extra_update_ops], feed_dict={is_training: True, X: X_batch, y: y_batch})\n",
|
||||||
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
" acc_train = accuracy.eval(feed_dict={is_training: False, X: X_batch, y: y_batch})\n",
|
||||||
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
" acc_test = accuracy.eval(feed_dict={is_training: False, X: mnist.test.images, y: mnist.test.labels})\n",
|
||||||
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test)\n",
|
||||||
|
@ -557,6 +610,16 @@
|
||||||
"[v.name for v in tf.global_variables()]"
|
"[v.name for v in tf.global_variables()]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: the weights variable created by the `tf.layers.dense()` function is called `\"kernel\"` (instead of `\"weights\"` when using the `tf.contrib.layers.fully_connected()`, as in the book):"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 19,
|
||||||
|
@ -568,8 +631,8 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with tf.variable_scope(\"\", default_name=\"\", reuse=True): # root scope\n",
|
"with tf.variable_scope(\"\", default_name=\"\", reuse=True): # root scope\n",
|
||||||
" weights1 = tf.get_variable(\"hidden1/weights\")\n",
|
" weights1 = tf.get_variable(\"hidden1/kernel\")\n",
|
||||||
" weights2 = tf.get_variable(\"hidden2/weights\")\n",
|
" weights2 = tf.get_variable(\"hidden2/kernel\")\n",
|
||||||
" "
|
" "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -689,6 +752,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"from functools import partial\n",
|
||||||
|
"\n",
|
||||||
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
"X = tf.placeholder(tf.float32, shape=(None, n_inputs), name=\"X\")\n",
|
||||||
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
"y = tf.placeholder(tf.int64, shape=(None), name=\"y\")\n",
|
||||||
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
"is_training = tf.placeholder(tf.bool, shape=(), name='is_training')\n",
|
||||||
|
@ -701,12 +766,15 @@
|
||||||
" return max_norm\n",
|
" return max_norm\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"dnn\"):\n",
|
"with tf.name_scope(\"dnn\"):\n",
|
||||||
" with arg_scope(\n",
|
" \n",
|
||||||
" [fully_connected],\n",
|
" my_dense_layer = partial(\n",
|
||||||
" weights_regularizer=max_norm_regularizer(1.5)):\n",
|
" tf.layers.dense,\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1, scope=\"hidden1\")\n",
|
" activation=tf.nn.relu,\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2, scope=\"hidden2\")\n",
|
" kernel_regularizer=max_norm_regularizer(1.5))\n",
|
||||||
" logits = fully_connected(hidden2, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
"\n",
|
||||||
|
" hidden1 = my_dense_layer(X, n_hidden1, name=\"hidden1\")\n",
|
||||||
|
" hidden2 = my_dense_layer(hidden1, n_hidden2, name=\"hidden2\")\n",
|
||||||
|
" logits = my_dense_layer(hidden2, n_outputs, activation=None, name=\"outputs\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"clip_all_weights = tf.get_collection(\"max_norm\")\n",
|
"clip_all_weights = tf.get_collection(\"max_norm\")\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
@ -770,6 +838,18 @@
|
||||||
"show_graph(tf.get_default_graph())"
|
"show_graph(tf.get_default_graph())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
|
||||||
|
"* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
|
||||||
|
"* the `is_training` parameter is renamed to `training`."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 30,
|
||||||
|
@ -780,7 +860,7 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from tensorflow.contrib.layers import dropout\n",
|
"from functools import partial\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -795,20 +875,22 @@
|
||||||
"learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n",
|
"learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,\n",
|
||||||
" decay_steps, decay_rate)\n",
|
" decay_steps, decay_rate)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"keep_prob = 0.5\n",
|
"dropout_rate = 0.5\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"dnn\"):\n",
|
"with tf.name_scope(\"dnn\"):\n",
|
||||||
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
" he_init = tf.contrib.layers.variance_scaling_initializer()\n",
|
||||||
" with arg_scope(\n",
|
"\n",
|
||||||
" [fully_connected],\n",
|
" my_dense_layer = partial(\n",
|
||||||
" activation_fn=tf.nn.elu,\n",
|
" tf.layers.dense,\n",
|
||||||
" weights_initializer=he_init):\n",
|
" activation=tf.nn.elu,\n",
|
||||||
" X_drop = dropout(X, keep_prob, is_training=is_training)\n",
|
" kernel_initializer=he_init)\n",
|
||||||
" hidden1 = fully_connected(X_drop, n_hidden1, scope=\"hidden1\")\n",
|
"\n",
|
||||||
" hidden1_drop = dropout(hidden1, keep_prob, is_training=is_training)\n",
|
" X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
|
||||||
" hidden2 = fully_connected(hidden1_drop, n_hidden2, scope=\"hidden2\")\n",
|
" hidden1 = my_dense_layer(X_drop, n_hidden1, name=\"hidden1\")\n",
|
||||||
" hidden2_drop = dropout(hidden2, keep_prob, is_training=is_training)\n",
|
" hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=is_training)\n",
|
||||||
" logits = fully_connected(hidden2_drop, n_outputs, activation_fn=None, scope=\"outputs\")\n",
|
" hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name=\"hidden2\")\n",
|
||||||
|
" hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=is_training)\n",
|
||||||
|
" logits = my_dense_layer(hidden2_drop, n_outputs, activation=None, name=\"outputs\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.name_scope(\"loss\"):\n",
|
"with tf.name_scope(\"loss\"):\n",
|
||||||
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
|
@ -970,7 +1052,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {
|
"nav_menu": {
|
||||||
"height": "360px",
|
"height": "360px",
|
||||||
|
|
|
@ -541,7 +541,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {},
|
"nav_menu": {},
|
||||||
"toc": {
|
"toc": {
|
||||||
|
|
|
@ -402,50 +402,101 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "markdown",
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"from six.moves import urllib\n",
|
"Note: instead of using the `fully_connected()`, `conv2d()` and `dropout()` functions from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()`, `conv2d()` and `dropout()` functions (respectively) from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
|
||||||
"from sklearn.datasets import fetch_mldata\n",
|
|
||||||
"try:\n",
|
|
||||||
" mnist = fetch_mldata('MNIST original')\n",
|
|
||||||
"except urllib.error.HTTPError as ex:\n",
|
|
||||||
" print(\"Could not download MNIST data from mldata.org, trying alternative...\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" # Alternative method to load MNIST, if mldata.org is down\n",
|
"For all these functions:\n",
|
||||||
" from scipy.io import loadmat\n",
|
"* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
|
||||||
" mnist_alternative_url = \"https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat\"\n",
|
"\n",
|
||||||
" mnist_path = \"./mnist-original.mat\"\n",
|
"The other main differences in `tf.layers.dense()` are:\n",
|
||||||
" response = urllib.request.urlopen(mnist_alternative_url)\n",
|
"* the `weights` parameter was renamed to `kernel` (and the weights variable is now named `\"kernel\"` rather than `\"weights\"`),\n",
|
||||||
" with open(mnist_path, \"wb\") as f:\n",
|
"* the default activation is `None` instead of `tf.nn.relu`\n",
|
||||||
" content = response.read()\n",
|
"\n",
|
||||||
" f.write(content)\n",
|
"The other main differences in `tf.layers.conv2d()` are:\n",
|
||||||
" mnist_raw = loadmat(mnist_path)\n",
|
"* the `num_outputs` parameter was renamed to `filters`,\n",
|
||||||
" mnist = {\n",
|
"* the `stride` parameter was renamed to `strides`,\n",
|
||||||
" \"data\": mnist_raw[\"data\"].T,\n",
|
"* the default `activation` is now `None` instead of `tf.nn.relu`.\n",
|
||||||
" \"target\": mnist_raw[\"label\"][0],\n",
|
"\n",
|
||||||
" \"COL_NAMES\": [\"label\", \"data\"],\n",
|
"The other main differences in `tf.layers.dropout()` are:\n",
|
||||||
" \"DESCR\": \"mldata.org dataset: mnist-original\",\n",
|
"* it takes the dropout rate (`rate`) rather than the keep probability (`keep_prob`). Of course, `rate == 1 - keep_prob`,\n",
|
||||||
" }\n",
|
"* the `is_training` parameters was renamed to `training`."
|
||||||
" print(\"Success!\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 15,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"X_train, X_test = mnist[\"data\"][:60000].astype(np.float64), mnist[\"data\"][60000:].astype(np.float64)\n",
|
"height = 28\n",
|
||||||
"y_train, y_test = mnist[\"target\"][:60000].astype(np.int64), mnist[\"target\"][60000:].astype(np.int64)"
|
"width = 28\n",
|
||||||
|
"channels = 1\n",
|
||||||
|
"n_inputs = height * width\n",
|
||||||
|
"\n",
|
||||||
|
"conv1_fmaps = 32\n",
|
||||||
|
"conv1_ksize = 3\n",
|
||||||
|
"conv1_stride = 1\n",
|
||||||
|
"conv1_pad = \"SAME\"\n",
|
||||||
|
"\n",
|
||||||
|
"conv2_fmaps = 64\n",
|
||||||
|
"conv2_ksize = 3\n",
|
||||||
|
"conv2_stride = 1\n",
|
||||||
|
"conv2_pad = \"SAME\"\n",
|
||||||
|
"conv2_dropout_rate = 0.25\n",
|
||||||
|
"\n",
|
||||||
|
"pool3_fmaps = conv2_fmaps\n",
|
||||||
|
"\n",
|
||||||
|
"n_fc1 = 128\n",
|
||||||
|
"fc1_dropout_rate = 0.5\n",
|
||||||
|
"\n",
|
||||||
|
"n_outputs = 10\n",
|
||||||
|
"\n",
|
||||||
|
"graph = tf.Graph()\n",
|
||||||
|
"with graph.as_default():\n",
|
||||||
|
" with tf.name_scope(\"inputs\"):\n",
|
||||||
|
" X = tf.placeholder(tf.float32, shape=[None, n_inputs], name=\"X\")\n",
|
||||||
|
" X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])\n",
|
||||||
|
" y = tf.placeholder(tf.int32, shape=[None], name=\"y\")\n",
|
||||||
|
" is_training = tf.placeholder_with_default(False, shape=[], name='is_training')\n",
|
||||||
|
"\n",
|
||||||
|
" conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name=\"conv1\")\n",
|
||||||
|
" conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name=\"conv2\")\n",
|
||||||
|
"\n",
|
||||||
|
" with tf.name_scope(\"pool3\"):\n",
|
||||||
|
" pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=\"VALID\")\n",
|
||||||
|
" pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])\n",
|
||||||
|
" pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=is_training)\n",
|
||||||
|
"\n",
|
||||||
|
" with tf.name_scope(\"fc1\"):\n",
|
||||||
|
" fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name=\"fc1\")\n",
|
||||||
|
" fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=is_training)\n",
|
||||||
|
"\n",
|
||||||
|
" with tf.name_scope(\"output\"):\n",
|
||||||
|
" logits = tf.layers.dense(fc1, n_outputs, name=\"output\")\n",
|
||||||
|
" Y_proba = tf.nn.softmax(logits, name=\"Y_proba\")\n",
|
||||||
|
"\n",
|
||||||
|
" with tf.name_scope(\"train\"):\n",
|
||||||
|
" xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)\n",
|
||||||
|
" loss = tf.reduce_mean(xentropy)\n",
|
||||||
|
" optimizer = tf.train.AdamOptimizer()\n",
|
||||||
|
" training_op = optimizer.minimize(loss)\n",
|
||||||
|
"\n",
|
||||||
|
" with tf.name_scope(\"eval\"):\n",
|
||||||
|
" correct = tf.nn.in_top_k(logits, y, 1)\n",
|
||||||
|
" accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
|
||||||
|
"\n",
|
||||||
|
" with tf.name_scope(\"init_and_save\"):\n",
|
||||||
|
" init = tf.global_variables_initializer()\n",
|
||||||
|
" saver = tf.train.Saver()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -458,9 +509,78 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"height, width = 28, 28\n",
|
"from tensorflow.examples.tutorials.mnist import input_data\n",
|
||||||
"images = X_test[5000].reshape(1, height, width, 1)\n",
|
"mnist = input_data.read_data_sets(\"/tmp/data/\")"
|
||||||
"plot_image(images[0, :, :, 0])"
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def get_model_params():\n",
|
||||||
|
" gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)\n",
|
||||||
|
" return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}\n",
|
||||||
|
"\n",
|
||||||
|
"def restore_model_params(model_params):\n",
|
||||||
|
" gvar_names = list(model_params.keys())\n",
|
||||||
|
" assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + \"/Assign\")\n",
|
||||||
|
" for gvar_name in gvar_names}\n",
|
||||||
|
" init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}\n",
|
||||||
|
" feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}\n",
|
||||||
|
" tf.get_default_session().run(assign_ops, feed_dict=feed_dict)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"n_epochs = 1000\n",
|
||||||
|
"batch_size = 50\n",
|
||||||
|
"\n",
|
||||||
|
"best_acc_val = 0\n",
|
||||||
|
"check_interval = 100\n",
|
||||||
|
"checks_since_last_progress = 0\n",
|
||||||
|
"max_checks_without_progress = 100\n",
|
||||||
|
"best_model_params = None \n",
|
||||||
|
"\n",
|
||||||
|
"with tf.Session(graph=graph) as sess:\n",
|
||||||
|
" init.run()\n",
|
||||||
|
" for epoch in range(n_epochs):\n",
|
||||||
|
" for iteration in range(mnist.train.num_examples // batch_size):\n",
|
||||||
|
" X_batch, y_batch = mnist.train.next_batch(batch_size)\n",
|
||||||
|
" sess.run(training_op, feed_dict={X: X_batch, y: y_batch, is_training: True})\n",
|
||||||
|
" if iteration % check_interval == 0:\n",
|
||||||
|
" acc_val = accuracy.eval(feed_dict={X: mnist.test.images[:2000], y: mnist.test.labels[:2000]})\n",
|
||||||
|
" if acc_val > best_acc_val:\n",
|
||||||
|
" best_acc_val = acc_val\n",
|
||||||
|
" checks_since_last_progress = 0\n",
|
||||||
|
" best_model_params = get_model_params()\n",
|
||||||
|
" else:\n",
|
||||||
|
" checks_since_last_progress += 1\n",
|
||||||
|
" acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})\n",
|
||||||
|
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n",
|
||||||
|
" print(epoch, \"Train accuracy:\", acc_train, \"Test accuracy:\", acc_test, \"Best validation accuracy:\", best_acc_val)\n",
|
||||||
|
" if checks_since_last_progress > max_checks_without_progress:\n",
|
||||||
|
" print(\"Early stopping!\")\n",
|
||||||
|
" break\n",
|
||||||
|
"\n",
|
||||||
|
" if best_model_params:\n",
|
||||||
|
" restore_model_params(best_model_params)\n",
|
||||||
|
" acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})\n",
|
||||||
|
" print(\"Final accuracy on test set:\", acc_test)\n",
|
||||||
|
" save_path = saver.save(sess, \"./my_mnist_model\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -475,7 +595,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": 21,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -511,7 +631,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 22,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -524,7 +644,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 23,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -544,7 +664,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 24,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -557,7 +677,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": 25,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -572,7 +692,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 26,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -589,7 +709,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 27,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -611,7 +731,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 28,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -628,7 +748,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 29,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -641,7 +761,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 30,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -654,7 +774,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 31,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -717,7 +837,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {},
|
"nav_menu": {},
|
||||||
"toc": {
|
"toc": {
|
||||||
|
|
|
@ -573,6 +573,18 @@
|
||||||
"## Training a sequence classifier"
|
"## Training a sequence classifier"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: the book uses `tensorflow.contrib.layers.fully_connected()` rather than `tf.layers.dense()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dense()`, because anything in the contrib module may change or be deleted without notice. The `dense()` function is almost identical to the `fully_connected()` function. The main differences relevant to this chapter are:\n",
|
||||||
|
"* several parameters are renamed: `scope` becomes `name`, `activation_fn` becomes `activation` (and similarly the `_fn` suffix is removed from other parameters such as `normalizer_fn`), `weights_initializer` becomes `kernel_initializer`, etc.\n",
|
||||||
|
"* the default `activation` is now `None` rather than `tf.nn.relu`."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 23,
|
||||||
|
@ -585,8 +597,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_steps = 28\n",
|
"n_steps = 28\n",
|
||||||
"n_inputs = 28\n",
|
"n_inputs = 28\n",
|
||||||
"n_neurons = 150\n",
|
"n_neurons = 150\n",
|
||||||
|
@ -601,7 +611,7 @@
|
||||||
" basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
|
" basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)\n",
|
||||||
" outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
|
" outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"logits = fully_connected(states, n_outputs, activation_fn=None)\n",
|
"logits = tf.layers.dense(states, n_outputs)\n",
|
||||||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
"loss = tf.reduce_mean(xentropy)\n",
|
"loss = tf.reduce_mean(xentropy)\n",
|
||||||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||||||
|
@ -675,8 +685,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_steps = 28\n",
|
"n_steps = 28\n",
|
||||||
"n_inputs = 28\n",
|
"n_inputs = 28\n",
|
||||||
"n_neurons1 = 150\n",
|
"n_neurons1 = 150\n",
|
||||||
|
@ -693,7 +701,7 @@
|
||||||
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n",
|
"multi_layer_cell = tf.contrib.rnn.MultiRNNCell([hidden1, hidden2])\n",
|
||||||
"outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
|
"outputs, states_tuple = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
|
||||||
"states = tf.concat(axis=1, values=states_tuple)\n",
|
"states = tf.concat(axis=1, values=states_tuple)\n",
|
||||||
"logits = fully_connected(states, n_outputs, activation_fn=None)\n",
|
"logits = tf.layers.dense(states, n_outputs)\n",
|
||||||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
"loss = tf.reduce_mean(xentropy)\n",
|
"loss = tf.reduce_mean(xentropy)\n",
|
||||||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||||||
|
@ -847,8 +855,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_steps = 20\n",
|
"n_steps = 20\n",
|
||||||
"n_inputs = 1\n",
|
"n_inputs = 1\n",
|
||||||
"n_neurons = 100\n",
|
"n_neurons = 100\n",
|
||||||
|
@ -942,8 +948,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_steps = 20\n",
|
"n_steps = 20\n",
|
||||||
"n_inputs = 1\n",
|
"n_inputs = 1\n",
|
||||||
"n_neurons = 100\n",
|
"n_neurons = 100\n",
|
||||||
|
@ -958,7 +962,7 @@
|
||||||
"learning_rate = 0.001\n",
|
"learning_rate = 0.001\n",
|
||||||
"\n",
|
"\n",
|
||||||
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
"stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||||||
"stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
|
"stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||||||
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
"outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"loss = tf.reduce_sum(tf.square(outputs - y))\n",
|
"loss = tf.reduce_sum(tf.square(outputs - y))\n",
|
||||||
|
@ -1181,7 +1185,6 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"n_inputs = 1\n",
|
"n_inputs = 1\n",
|
||||||
"n_neurons = 100\n",
|
"n_neurons = 100\n",
|
||||||
|
@ -1202,7 +1205,7 @@
|
||||||
" rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
|
" rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
" stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])\n",
|
||||||
" stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None)\n",
|
" stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)\n",
|
||||||
" outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
" outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])\n",
|
||||||
"\n",
|
"\n",
|
||||||
" loss = tf.reduce_sum(tf.square(outputs - y))\n",
|
" loss = tf.reduce_sum(tf.square(outputs - y))\n",
|
||||||
|
@ -1277,8 +1280,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_steps = 28\n",
|
"n_steps = 28\n",
|
||||||
"n_inputs = 28\n",
|
"n_inputs = 28\n",
|
||||||
"n_neurons = 150\n",
|
"n_neurons = 150\n",
|
||||||
|
@ -1293,7 +1294,7 @@
|
||||||
"multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n",
|
"multi_cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*3)\n",
|
||||||
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
|
"outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
|
||||||
"top_layer_h_state = states[-1][1]\n",
|
"top_layer_h_state = states[-1][1]\n",
|
||||||
"logits = fully_connected(top_layer_h_state, n_outputs, activation_fn=None, scope=\"softmax\")\n",
|
"logits = tf.layers.dense(top_layer_h_state, n_outputs, name=\"softmax\")\n",
|
||||||
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
"xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)\n",
|
||||||
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
"loss = tf.reduce_mean(xentropy, name=\"loss\")\n",
|
||||||
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
"optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
|
||||||
|
@ -1336,7 +1337,8 @@
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
"editable": true
|
"editable": true,
|
||||||
|
"scrolled": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -1466,7 +1468,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 52,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1509,7 +1511,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 53,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1522,7 +1524,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 54,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1545,7 +1547,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 55,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1565,7 +1567,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 56,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1578,7 +1580,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 57,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1591,7 +1593,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 58,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1614,7 +1616,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 59,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1652,7 +1654,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 60,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1666,7 +1668,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 61,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1679,7 +1681,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": 62,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1702,7 +1704,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 63,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1728,7 +1730,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 64,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1787,7 +1789,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 65,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1795,7 +1797,7 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"num_steps = 100001\n",
|
"num_steps = 10001\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.Session() as session:\n",
|
"with tf.Session() as session:\n",
|
||||||
" init.run()\n",
|
" init.run()\n",
|
||||||
|
@ -1846,7 +1848,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": 66,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1869,7 +1871,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 67,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1893,7 +1895,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 68,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1932,7 +1934,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 69,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1976,7 +1978,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 70,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -2033,7 +2035,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {},
|
"nav_menu": {},
|
||||||
"toc": {
|
"toc": {
|
||||||
|
|
|
@ -225,7 +225,23 @@
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"Now let's build the Autoencoder:"
|
"Now let's build the Autoencoder..."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
|
||||||
|
"\n",
|
||||||
|
"The main differences relevant to this chapter are:\n",
|
||||||
|
"* the `scope` parameter was renamed to `name`, and the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
|
||||||
|
"* the `weights` parameter was renamed to `kernel` and the weights variable is now named `\"kernel\"` rather than `\"weights\"`,\n",
|
||||||
|
"* the bias variable is now named `\"bias\"` rather than `\"biases\"`,\n",
|
||||||
|
"* the default activation is `None` instead of `tf.nn.relu`"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -240,8 +256,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"n_inputs = 3\n",
|
"n_inputs = 3\n",
|
||||||
"n_hidden = 2 # codings\n",
|
"n_hidden = 2 # codings\n",
|
||||||
"n_outputs = n_inputs\n",
|
"n_outputs = n_inputs\n",
|
||||||
|
@ -249,8 +263,8 @@
|
||||||
"learning_rate = 0.01\n",
|
"learning_rate = 0.01\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
"hidden = fully_connected(X, n_hidden, activation_fn=None)\n",
|
"hidden = tf.layers.dense(X, n_hidden)\n",
|
||||||
"outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
|
"outputs = tf.layers.dense(hidden, n_outputs)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -352,6 +366,16 @@
|
||||||
"Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
|
"Let's build a stacked Autoencoder with 3 hidden layers and 1 output layer (ie. 2 stacked Autoencoders). We will use ELU activation, He initialization and L2 regularization."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"deletable": true,
|
||||||
|
"editable": true
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Note: since the `tf.layers.dense()` function is incompatible with `tf.contrib.layers.arg_scope()` (which is used in the book), we now use python's `functools.partial()` function instead. It makes it easy to create a `my_dense_layer()` function that just calls `tf.layers.dense()` with the desired parameters automatically set (unless they are overridden when calling `my_dense_layer()`)."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 11,
|
||||||
|
@ -364,7 +388,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
"from functools import partial\n",
|
||||||
"\n",
|
"\n",
|
||||||
"n_inputs = 28*28\n",
|
"n_inputs = 28*28\n",
|
||||||
"n_hidden1 = 300\n",
|
"n_hidden1 = 300\n",
|
||||||
|
@ -380,15 +404,17 @@
|
||||||
"#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
|
"#initializer = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
"with tf.contrib.framework.arg_scope(\n",
|
"\n",
|
||||||
" [fully_connected],\n",
|
"my_dense_layer = partial(\n",
|
||||||
" activation_fn=tf.nn.elu,\n",
|
" tf.layers.dense,\n",
|
||||||
" weights_initializer=initializer,\n",
|
" activation=tf.nn.elu,\n",
|
||||||
" weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
|
" kernel_initializer=initializer,\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1)\n",
|
" kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2)\n",
|
"\n",
|
||||||
" hidden3 = fully_connected(hidden2, n_hidden3)\n",
|
"hidden1 = my_dense_layer(X, n_hidden1)\n",
|
||||||
" outputs = fully_connected(hidden3, n_outputs, activation_fn=None)\n",
|
"hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
|
||||||
|
"hidden3 = my_dense_layer(hidden2, n_hidden3)\n",
|
||||||
|
"outputs = my_dense_layer(hidden3, n_outputs, activation=None)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
"mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -528,19 +554,23 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation_fn=tf.nn.elu):\n",
|
"from functools import partial\n",
|
||||||
|
"\n",
|
||||||
|
"def train_autoencoder(X_train, n_neurons, n_epochs, batch_size, learning_rate = 0.01, l2_reg = 0.0005, activation=tf.nn.elu):\n",
|
||||||
" graph = tf.Graph()\n",
|
" graph = tf.Graph()\n",
|
||||||
" with graph.as_default():\n",
|
" with graph.as_default():\n",
|
||||||
" n_inputs = X_train.shape[1]\n",
|
" n_inputs = X_train.shape[1]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
" X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
" with tf.contrib.framework.arg_scope(\n",
|
" \n",
|
||||||
" [fully_connected],\n",
|
" my_dense_layer = partial(\n",
|
||||||
" activation_fn=activation_fn,\n",
|
" tf.layers.dense,\n",
|
||||||
" weights_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
|
" activation=activation,\n",
|
||||||
" weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):\n",
|
" kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),\n",
|
||||||
" hidden = fully_connected(X, n_neurons, scope=\"hidden\")\n",
|
" kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))\n",
|
||||||
" outputs = fully_connected(hidden, n_inputs, activation_fn=None, scope=\"outputs\")\n",
|
"\n",
|
||||||
|
" hidden = my_dense_layer(X, n_neurons, name=\"hidden\")\n",
|
||||||
|
" outputs = my_dense_layer(hidden, n_inputs, activation=None, name=\"outputs\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
" mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
" mse = tf.reduce_mean(tf.square(outputs - X))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -566,7 +596,7 @@
|
||||||
" print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
|
" print(\"\\r{}\".format(epoch), \"Train MSE:\", mse_train)\n",
|
||||||
" params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
|
" params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])\n",
|
||||||
" hidden_val = hidden.eval(feed_dict={X: X_train})\n",
|
" hidden_val = hidden.eval(feed_dict={X: X_train})\n",
|
||||||
" return hidden_val, params[\"hidden/weights:0\"], params[\"hidden/biases:0\"], params[\"outputs/weights:0\"], params[\"outputs/biases:0\"]"
|
" return hidden_val, params[\"hidden/kernel:0\"], params[\"hidden/bias:0\"], params[\"outputs/kernel:0\"], params[\"outputs/bias:0\"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -853,7 +883,7 @@
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `fully_connected()` function, so we need to build the Autoencoder manually:"
|
"It is common to tie the weights of the encoder and the decoder (`weights_decoder = tf.transpose(weights_encoder)`). Unfortunately this makes it impossible (or very tricky) to use the `tf.layers.dense()` function, so we need to build the Autoencoder manually:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1114,11 +1144,20 @@
|
||||||
"# Stacked denoising Autoencoder"
|
"# Stacked denoising Autoencoder"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: the book uses `tf.contrib.layers.dropout()` rather than `tf.layers.dropout()` (which did not exist when this chapter was written). It is now preferable to use `tf.layers.dropout()`, because anything in the contrib module may change or be deleted without notice. The `tf.layers.dropout()` function is almost identical to the `tf.contrib.layers.dropout()` function, except for a few minor differences. Most importantly:\n",
|
||||||
|
"* you must specify the dropout rate (`rate`) rather than the keep probability (`keep_prob`), where `rate` is simply equal to `1 - keep_prob`,\n",
|
||||||
|
"* the `is_training` parameter is renamed to `training`."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 31,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
"editable": true
|
"editable": true
|
||||||
},
|
},
|
||||||
|
@ -1126,8 +1165,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import dropout\n",
|
|
||||||
"\n",
|
|
||||||
"n_inputs = 28 * 28\n",
|
"n_inputs = 28 * 28\n",
|
||||||
"n_hidden1 = 300\n",
|
"n_hidden1 = 300\n",
|
||||||
"n_hidden2 = 150 # codings\n",
|
"n_hidden2 = 150 # codings\n",
|
||||||
|
@ -1136,7 +1173,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"learning_rate = 0.01\n",
|
"learning_rate = 0.01\n",
|
||||||
"l2_reg = 0.00001\n",
|
"l2_reg = 0.00001\n",
|
||||||
"keep_prob = 0.7\n",
|
"dropout_rate = 0.3\n",
|
||||||
"\n",
|
"\n",
|
||||||
"activation = tf.nn.elu\n",
|
"activation = tf.nn.elu\n",
|
||||||
"regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
|
"regularizer = tf.contrib.layers.l2_regularizer(l2_reg)\n",
|
||||||
|
@ -1145,7 +1182,7 @@
|
||||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
"is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
|
"is_training = tf.placeholder_with_default(False, shape=(), name='is_training')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X_drop = dropout(X, keep_prob, is_training=is_training)\n",
|
"X_drop = tf.layers.dropout(X, dropout_rate, training=is_training)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"weights1_init = initializer([n_inputs, n_hidden1])\n",
|
"weights1_init = initializer([n_inputs, n_hidden1])\n",
|
||||||
"weights2_init = initializer([n_hidden1, n_hidden2])\n",
|
"weights2_init = initializer([n_hidden1, n_hidden2])\n",
|
||||||
|
@ -1177,7 +1214,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 31,
|
"execution_count": 32,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1204,7 +1241,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 33,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1227,7 +1264,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 34,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1242,7 +1279,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 35,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1270,7 +1307,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 35,
|
"execution_count": 36,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1295,7 +1332,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 36,
|
"execution_count": 37,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1310,7 +1347,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 37,
|
"execution_count": 38,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1360,7 +1397,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 39,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1387,7 +1424,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 39,
|
"execution_count": 40,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1410,7 +1447,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 40,
|
"execution_count": 43,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1478,7 +1515,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 41,
|
"execution_count": 44,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1488,6 +1525,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"from functools import partial\n",
|
||||||
|
"\n",
|
||||||
"n_inputs = 28*28\n",
|
"n_inputs = 28*28\n",
|
||||||
"n_hidden1 = 500\n",
|
"n_hidden1 = 500\n",
|
||||||
"n_hidden2 = 500\n",
|
"n_hidden2 = 500\n",
|
||||||
|
@ -1500,20 +1539,22 @@
|
||||||
"\n",
|
"\n",
|
||||||
"initializer = tf.contrib.layers.variance_scaling_initializer()\n",
|
"initializer = tf.contrib.layers.variance_scaling_initializer()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with tf.contrib.framework.arg_scope([fully_connected],\n",
|
"my_dense_layer = partial(\n",
|
||||||
" activation_fn=tf.nn.elu,\n",
|
" tf.layers.dense,\n",
|
||||||
" weights_initializer=initializer):\n",
|
" activation=tf.nn.elu,\n",
|
||||||
" X = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
" kernel_initializer=initializer)\n",
|
||||||
" hidden1 = fully_connected(X, n_hidden1)\n",
|
"\n",
|
||||||
" hidden2 = fully_connected(hidden1, n_hidden2)\n",
|
"X = tf.placeholder(tf.float32, [None, n_inputs])\n",
|
||||||
" hidden3_mean = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
|
"hidden1 = my_dense_layer(X, n_hidden1)\n",
|
||||||
" hidden3_gamma = fully_connected(hidden2, n_hidden3, activation_fn=None)\n",
|
"hidden2 = my_dense_layer(hidden1, n_hidden2)\n",
|
||||||
" noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
|
"hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
|
||||||
" hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
|
"hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)\n",
|
||||||
" hidden4 = fully_connected(hidden3, n_hidden4)\n",
|
"noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)\n",
|
||||||
" hidden5 = fully_connected(hidden4, n_hidden5)\n",
|
"hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise\n",
|
||||||
" logits = fully_connected(hidden5, n_outputs, activation_fn=None)\n",
|
"hidden4 = my_dense_layer(hidden3, n_hidden4)\n",
|
||||||
" outputs = tf.sigmoid(logits)\n",
|
"hidden5 = my_dense_layer(hidden4, n_hidden5)\n",
|
||||||
|
"logits = my_dense_layer(hidden5, n_outputs, activation=None)\n",
|
||||||
|
"outputs = tf.sigmoid(logits)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n",
|
"reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits))\n",
|
||||||
"latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
|
"latent_loss = 0.5 * tf.reduce_sum(tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)\n",
|
||||||
|
@ -1528,7 +1569,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 45,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1565,7 +1606,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 43,
|
"execution_count": 46,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1594,7 +1635,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 44,
|
"execution_count": 47,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1619,7 +1660,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 45,
|
"execution_count": 48,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1647,7 +1688,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 49,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1667,7 +1708,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 47,
|
"execution_count": 50,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1682,7 +1723,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 48,
|
"execution_count": 51,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1712,7 +1753,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 49,
|
"execution_count": 52,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1787,7 +1828,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {
|
"nav_menu": {
|
||||||
"height": "381px",
|
"height": "381px",
|
||||||
|
|
|
@ -986,6 +986,18 @@
|
||||||
"Let's create a neural network that will take observations as inputs, and output the action to take for each observation. To choose an action, the network will first estimate a probability for each action, then select an action randomly according to the estimated probabilities. In the case of the Cart-Pole environment, there are just two possible actions (left or right), so we only need one output neuron: it will output the probability `p` of the action 0 (left), and of course the probability of action 1 (right) will be `1 - p`."
|
"Let's create a neural network that will take observations as inputs, and output the action to take for each observation. To choose an action, the network will first estimate a probability for each action, then select an action randomly according to the estimated probabilities. In the case of the Cart-Pole environment, there are just two possible actions (left or right), so we only need one output neuron: it will output the probability `p` of the action 0 (left), and of course the probability of action 1 (right) will be `1 - p`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: instead of using the `fully_connected()` function from the `tensorflow.contrib.layers` module (as in the book), we now use the `dense()` function from the `tf.layers` module, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same.\n",
|
||||||
|
"\n",
|
||||||
|
"The main differences relevant to this chapter are:\n",
|
||||||
|
"* the `_fn` suffix was removed in all the parameters that had it (for example the `activation_fn` parameter was renamed to `activation`).\n",
|
||||||
|
"* the `weights` parameter was renamed to `kernel`,\n",
|
||||||
|
"* the default activation is `None` instead of `tf.nn.relu`"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 34,
|
||||||
|
@ -997,7 +1009,6 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# 1. Specify the network architecture\n",
|
"# 1. Specify the network architecture\n",
|
||||||
"n_inputs = 4 # == env.observation_space.shape[0]\n",
|
"n_inputs = 4 # == env.observation_space.shape[0]\n",
|
||||||
|
@ -1007,10 +1018,10 @@
|
||||||
"\n",
|
"\n",
|
||||||
"# 2. Build the neural network\n",
|
"# 2. Build the neural network\n",
|
||||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
"hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu,\n",
|
"hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu,\n",
|
||||||
" weights_initializer=initializer)\n",
|
" kernel_initializer=initializer)\n",
|
||||||
"outputs = fully_connected(hidden, n_outputs, activation_fn=tf.nn.sigmoid,\n",
|
"outputs = tf.layers.dense(hidden, n_outputs, activation=tf.nn.sigmoid,\n",
|
||||||
" weights_initializer=initializer)\n",
|
" kernel_initializer=initializer)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# 3. Select a random action based on the estimated probabilities\n",
|
"# 3. Select a random action based on the estimated probabilities\n",
|
||||||
"p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
|
"p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
|
||||||
|
@ -1121,7 +1132,6 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -1136,8 +1146,8 @@
|
||||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
"y = tf.placeholder(tf.float32, shape=[None, n_outputs])\n",
|
"y = tf.placeholder(tf.float32, shape=[None, n_outputs])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)\n",
|
"hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer)\n",
|
||||||
"logits = fully_connected(hidden, n_outputs, activation_fn=None)\n",
|
"logits = tf.layers.dense(hidden, n_outputs)\n",
|
||||||
"outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n",
|
"outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n",
|
||||||
"p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
|
"p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
|
||||||
"action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n",
|
"action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n",
|
||||||
|
@ -1275,7 +1285,6 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"from tensorflow.contrib.layers import fully_connected\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -1289,8 +1298,8 @@
|
||||||
"\n",
|
"\n",
|
||||||
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
"X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"hidden = fully_connected(X, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)\n",
|
"hidden = tf.layers.dense(X, n_hidden, activation=tf.nn.elu, kernel_initializer=initializer)\n",
|
||||||
"logits = fully_connected(hidden, n_outputs, activation_fn=None)\n",
|
"logits = tf.layers.dense(hidden, n_outputs)\n",
|
||||||
"outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n",
|
"outputs = tf.nn.sigmoid(logits) # probability of action 0 (left)\n",
|
||||||
"p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
|
"p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])\n",
|
||||||
"action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n",
|
"action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)\n",
|
||||||
|
@ -1366,7 +1375,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 48,
|
"execution_count": 45,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1416,7 +1425,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 49,
|
"execution_count": 46,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1429,7 +1438,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 50,
|
"execution_count": 47,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1454,7 +1463,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 51,
|
"execution_count": 48,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1499,7 +1508,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 52,
|
"execution_count": 49,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1594,7 +1603,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 53,
|
"execution_count": 50,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1623,7 +1632,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 54,
|
"execution_count": 51,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1637,7 +1646,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 55,
|
"execution_count": 52,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1650,7 +1659,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 56,
|
"execution_count": 53,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1677,7 +1686,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 57,
|
"execution_count": 54,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1691,7 +1700,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 58,
|
"execution_count": 55,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1704,7 +1713,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 59,
|
"execution_count": 56,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1737,7 +1746,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 60,
|
"execution_count": 57,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1759,7 +1768,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 61,
|
"execution_count": 58,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1790,9 +1799,22 @@
|
||||||
"## Build DQN"
|
"## Build DQN"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: instead of using `tf.contrib.layers.convolution2d()` or `tf.contrib.layers.conv2d()` (as in the book), we now use the `tf.layers.conv2d()`, which did not exist when this chapter was written. This is preferable because anything in contrib may change or be deleted without notice, while `tf.layers` is part of the official API. As you will see, the code is mostly the same, except that the parameter names have changed slightly:\n",
|
||||||
|
"* the `num_outputs` parameter was renamed to `filters`,\n",
|
||||||
|
"* the `stride` parameter was renamed to `strides`,\n",
|
||||||
|
"* the `_fn` suffix was removed from parameter names that had it (e.g., `activation_fn` was renamed to `activation`),\n",
|
||||||
|
"* the `weights_initializer` parameter was renamed to `kernel_initializer`,\n",
|
||||||
|
"* the weights variable was renamed to `\"kernel\"` (instead of `\"weights\"`), and the biases variable was renamed from `\"biases\"` to `\"bias\"`,\n",
|
||||||
|
"* and the default `activation` is now `None` instead of `tf.nn.relu`."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 62,
|
"execution_count": 59,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1802,8 +1824,6 @@
|
||||||
"source": [
|
"source": [
|
||||||
"tf.reset_default_graph()\n",
|
"tf.reset_default_graph()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from tensorflow.contrib.layers import convolution2d, fully_connected\n",
|
|
||||||
"\n",
|
|
||||||
"input_height = 88\n",
|
"input_height = 88\n",
|
||||||
"input_width = 80\n",
|
"input_width = 80\n",
|
||||||
"input_channels = 1\n",
|
"input_channels = 1\n",
|
||||||
|
@ -1824,12 +1844,12 @@
|
||||||
" prev_layer = X_state\n",
|
" prev_layer = X_state\n",
|
||||||
" conv_layers = []\n",
|
" conv_layers = []\n",
|
||||||
" with tf.variable_scope(scope) as scope:\n",
|
" with tf.variable_scope(scope) as scope:\n",
|
||||||
" for n_maps, kernel_size, stride, padding, activation in zip(conv_n_maps, conv_kernel_sizes, conv_strides, conv_paddings, conv_activation):\n",
|
" for n_maps, kernel_size, strides, padding, activation in zip(conv_n_maps, conv_kernel_sizes, conv_strides, conv_paddings, conv_activation):\n",
|
||||||
" prev_layer = convolution2d(prev_layer, num_outputs=n_maps, kernel_size=kernel_size, stride=stride, padding=padding, activation_fn=activation, weights_initializer=initializer)\n",
|
" prev_layer = tf.layers.conv2d(prev_layer, filters=n_maps, kernel_size=kernel_size, strides=strides, padding=padding, activation=activation, kernel_initializer=initializer)\n",
|
||||||
" conv_layers.append(prev_layer)\n",
|
" conv_layers.append(prev_layer)\n",
|
||||||
" last_conv_layer_flat = tf.reshape(prev_layer, shape=[-1, n_hidden_inputs])\n",
|
" last_conv_layer_flat = tf.reshape(prev_layer, shape=[-1, n_hidden_inputs])\n",
|
||||||
" hidden = fully_connected(last_conv_layer_flat, n_hidden, activation_fn=hidden_activation, weights_initializer=initializer)\n",
|
" hidden = tf.layers.dense(last_conv_layer_flat, n_hidden, activation=hidden_activation, kernel_initializer=initializer)\n",
|
||||||
" outputs = fully_connected(hidden, n_outputs, activation_fn=None)\n",
|
" outputs = tf.layers.dense(hidden, n_outputs)\n",
|
||||||
" trainable_vars = {var.name[len(scope.name):]: var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)}\n",
|
" trainable_vars = {var.name[len(scope.name):]: var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)}\n",
|
||||||
" return outputs, trainable_vars\n",
|
" return outputs, trainable_vars\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -1857,7 +1877,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 63,
|
"execution_count": 60,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1870,7 +1890,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 64,
|
"execution_count": 61,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1896,7 +1916,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 65,
|
"execution_count": 62,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -1919,7 +1939,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 66,
|
"execution_count": 63,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"deletable": true,
|
"deletable": true,
|
||||||
|
@ -2023,7 +2043,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.5.2+"
|
"version": "3.5.3"
|
||||||
},
|
},
|
||||||
"nav_menu": {},
|
"nav_menu": {},
|
||||||
"toc": {
|
"toc": {
|
||||||
|
|
Loading…
Reference in New Issue