Make notebook 14 reproducible (except for the dropout section, that one is tough), fixes #274

2018-08-06 16:27:32 +01:00 · 2018-08-06 16:27:32 +01:00 · 2688da0477
parent 56be8647f9
commit 2688da0477
1 changed files with 76 additions and 123 deletions
--- a/14_recurrent_neural_networks.ipynb
+++ b/14_recurrent_neural_networks.ipynb
@ -1427,9 +1427,7 @@
  {
   "cell_type": "code",
   "execution_count": 85,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -1440,9 +1438,7 @@
  {
   "cell_type": "code",
   "execution_count": 86,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_steps = 28\n",
@ -1516,9 +1512,7 @@
  {
   "cell_type": "code",
   "execution_count": 90,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)"
@ -1527,9 +1521,7 @@
  {
   "cell_type": "code",
   "execution_count": 91,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)"
@ -1559,9 +1551,7 @@
  {
   "cell_type": "code",
   "execution_count": 92,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from six.moves import urllib\n",
@ -1600,9 +1590,7 @@
  {
   "cell_type": "code",
   "execution_count": 93,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "words = fetch_words_data()"
@ -1627,9 +1615,7 @@
  {
   "cell_type": "code",
   "execution_count": 95,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import Counter\n",
@ -1679,20 +1665,17 @@
  {
   "cell_type": "code",
   "execution_count": 99,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import random\n",
    "from collections import deque\n",
    "\n",
    "def generate_batch(batch_size, num_skips, skip_window):\n",
    "    global data_index\n",
    "    assert batch_size % num_skips == 0\n",
    "    assert num_skips <= 2 * skip_window\n",
-    "    batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n",
+    "    batch = np.ndarray(shape=[batch_size], dtype=np.int32)\n",
-    "    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n",
+    "    labels = np.ndarray(shape=[batch_size, 1], dtype=np.int32)\n",
    "    span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
    "    buffer = deque(maxlen=span)\n",
    "    for _ in range(span):\n",
@ -1703,7 +1686,7 @@
    "        targets_to_avoid = [ skip_window ]\n",
    "        for j in range(num_skips):\n",
    "            while target in targets_to_avoid:\n",
-    "                target = random.randint(0, span - 1)\n",
+    "                target = np.random.randint(0, span)\n",
    "            targets_to_avoid.append(target)\n",
    "            batch[i * num_skips + j] = buffer[skip_window]\n",
    "            labels[i * num_skips + j, 0] = buffer[target]\n",
@ -1715,13 +1698,10 @@
  {
   "cell_type": "code",
   "execution_count": 100,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
-    "data_index=0\n",
+    "np.random.seed(42)"
    "batch, labels = generate_batch(8, 2, 1)"
   ]
  },
  {
@ -1730,7 +1710,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "batch, [vocabulary[word] for word in batch]"
+    "data_index = 0\n",
    "batch, labels = generate_batch(8, 2, 1)"
   ]
  },
  {
@ -1738,6 +1719,15 @@
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch, [vocabulary[word] for word in batch]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels, [vocabulary[word] for word in labels[:, 0]]"
   ]
@ -1751,10 +1741,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 103,
+   "execution_count": 104,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_size = 128\n",
@ -1775,10 +1763,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 105,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -1790,10 +1776,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 105,
+   "execution_count": 106,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "vocabulary_size = 50000\n",
@ -1806,10 +1790,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 106,
+   "execution_count": 107,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
@ -1818,10 +1800,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 107,
+   "execution_count": 108,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Construct the variables for the NCE loss\n",
@ -1842,7 +1822,7 @@
    "training_op = optimizer.minimize(loss)\n",
    "\n",
    "# Compute the cosine similarity between minibatch examples and all embeddings.\n",
-    "norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))\n",
+    "norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keepdims=True))\n",
    "normalized_embeddings = embeddings / norm\n",
    "valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
    "similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
@ -1860,7 +1840,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1912,10 +1892,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 110,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.save(\"./my_final_embeddings.npy\", final_embeddings)"
@ -1930,10 +1908,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 111,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def plot_with_labels(low_dim_embs, labels):\n",
@ -1952,7 +1928,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1981,10 +1957,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 113,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
@ -2024,10 +1998,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 114,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
@ -2080,16 +2052,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 115,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from random import choice, seed\n",
    "\n",
    "# to make this notebook's output stable across runs\n",
    "seed(42)\n",
    "np.random.seed(42)\n",
    "\n",
    "default_reber_grammar = [\n",
@ -2114,7 +2080,8 @@
    "    state = 0\n",
    "    output = []\n",
    "    while state is not None:\n",
-    "        production, state = choice(grammar[state])\n",
+    "        index = np.random.randint(len(grammar[state]))\n",
    "        production, state = grammar[state][index]\n",
    "        if isinstance(production, list):\n",
    "            production = generate_string(grammar=production)\n",
    "        output.append(production)\n",
@ -2130,7 +2097,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2147,7 +2114,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2164,17 +2131,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": 118,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_corrupted_string(grammar, chars=\"BEPSTVX\"):\n",
    "    good_string = generate_string(grammar)\n",
    "    index = np.random.randint(len(good_string))\n",
    "    good_char = good_string[index]\n",
-    "    bad_char = choice(list(set(chars) - set(good_char)))\n",
+    "    bad_char = np.random.choice(sorted(set(chars) - set(good_char)))\n",
    "    return good_string[:index] + bad_char + good_string[index + 1:]"
   ]
  },
@ -2187,7 +2152,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2204,10 +2169,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": 120,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def string_to_one_hot_vectors(string, n_steps, chars=\"BEPSTVX\"):\n",
@ -2220,7 +2183,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2236,10 +2199,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 122,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_dataset(size):\n",
@ -2260,10 +2221,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 123,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_train, l_train, y_train = generate_dataset(10000)"
@ -2278,7 +2237,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2294,7 +2253,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2310,7 +2269,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2328,10 +2287,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 127,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "reset_graph()\n",
@ -2379,10 +2336,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 128,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_val, l_val, y_val = generate_dataset(5000)"
@ -2390,7 +2345,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
@ -2423,13 +2378,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_strings = [\n",
-    "    \"BPBTSSSSSSSSSSSSXXTTTTTVPXTTVPXTTTTTTTVPXVPXVPXTTTVVETE\",\n",
+    "    \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVETE\",\n",
-    "    \"BPBTSSSSSSSSSSSSXXTTTTTVPXTTVPXTTTTTTTVPXVPXVPXTTTVVEPE\"]\n",
+    "    \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVEPE\"]\n",
    "l_test = np.array([len(s) for s in test_strings])\n",
    "max_length = l_test.max()\n",
    "X_test = [string_to_one_hot_vectors(s, n_steps=max_length)\n",
@ -2449,7 +2404,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Ta-da! It worked fine. The RNN found the correct answers with absolute confidence. :)"
+    "Ta-da! It worked fine. The RNN found the correct answers with high confidence. :)"
   ]
  },
  {
@ -2469,9 +2424,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
+   "metadata": {},
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
@ -2492,7 +2445,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.6"
  },
  "nav_menu": {},
  "toc": {