Make notebook 14 reproducible (except for the dropout section, that one is tough), fixes #274

main
Aurélien Geron 2018-08-06 16:27:32 +01:00
parent 56be8647f9
commit 2688da0477
1 changed files with 76 additions and 123 deletions

View File

@ -1427,9 +1427,7 @@
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -1440,9 +1438,7 @@
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"n_steps = 28\n",
@ -1516,9 +1512,7 @@
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"lstm_cell = tf.contrib.rnn.LSTMCell(num_units=n_neurons, use_peepholes=True)"
@ -1527,9 +1521,7 @@
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"gru_cell = tf.contrib.rnn.GRUCell(num_units=n_neurons)"
@ -1559,9 +1551,7 @@
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from six.moves import urllib\n",
@ -1600,9 +1590,7 @@
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"words = fetch_words_data()"
@ -1627,9 +1615,7 @@
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
@ -1679,20 +1665,17 @@
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"from collections import deque\n",
"\n",
"def generate_batch(batch_size, num_skips, skip_window):\n",
" global data_index\n",
" assert batch_size % num_skips == 0\n",
" assert num_skips <= 2 * skip_window\n",
" batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n",
" labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n",
" batch = np.ndarray(shape=[batch_size], dtype=np.int32)\n",
" labels = np.ndarray(shape=[batch_size, 1], dtype=np.int32)\n",
" span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n",
" buffer = deque(maxlen=span)\n",
" for _ in range(span):\n",
@ -1703,7 +1686,7 @@
" targets_to_avoid = [ skip_window ]\n",
" for j in range(num_skips):\n",
" while target in targets_to_avoid:\n",
" target = random.randint(0, span - 1)\n",
" target = np.random.randint(0, span)\n",
" targets_to_avoid.append(target)\n",
" batch[i * num_skips + j] = buffer[skip_window]\n",
" labels[i * num_skips + j, 0] = buffer[target]\n",
@ -1715,13 +1698,10 @@
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"data_index=0\n",
"batch, labels = generate_batch(8, 2, 1)"
"np.random.seed(42)"
]
},
{
@ -1730,7 +1710,8 @@
"metadata": {},
"outputs": [],
"source": [
"batch, [vocabulary[word] for word in batch]"
"data_index = 0\n",
"batch, labels = generate_batch(8, 2, 1)"
]
},
{
@ -1738,6 +1719,15 @@
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"batch, [vocabulary[word] for word in batch]"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"labels, [vocabulary[word] for word in labels[:, 0]]"
]
@ -1751,10 +1741,8 @@
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"collapsed": true
},
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"batch_size = 128\n",
@ -1775,10 +1763,8 @@
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"collapsed": true
},
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -1790,10 +1776,8 @@
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": true
},
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"vocabulary_size = 50000\n",
@ -1806,10 +1790,8 @@
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": true
},
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"train_inputs = tf.placeholder(tf.int32, shape=[None])\n",
@ -1818,10 +1800,8 @@
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": true
},
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"# Construct the variables for the NCE loss\n",
@ -1842,7 +1822,7 @@
"training_op = optimizer.minimize(loss)\n",
"\n",
"# Compute the cosine similarity between minibatch examples and all embeddings.\n",
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keep_dims=True))\n",
"norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), axis=1, keepdims=True))\n",
"normalized_embeddings = embeddings / norm\n",
"valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)\n",
"similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)\n",
@ -1860,7 +1840,7 @@
},
{
"cell_type": "code",
"execution_count": 108,
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
@ -1912,10 +1892,8 @@
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"collapsed": true
},
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"np.save(\"./my_final_embeddings.npy\", final_embeddings)"
@ -1930,10 +1908,8 @@
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"collapsed": true
},
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"def plot_with_labels(low_dim_embs, labels):\n",
@ -1952,7 +1928,7 @@
},
{
"cell_type": "code",
"execution_count": 111,
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
@ -1981,10 +1957,8 @@
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": true
},
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
@ -2024,10 +1998,8 @@
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"collapsed": true
},
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"logits_flat = tf.reshape(logits, [-1, num_decoder_symbols])\n",
@ -2080,16 +2052,10 @@
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": true
},
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"from random import choice, seed\n",
"\n",
"# to make this notebook's output stable across runs\n",
"seed(42)\n",
"np.random.seed(42)\n",
"\n",
"default_reber_grammar = [\n",
@ -2114,7 +2080,8 @@
" state = 0\n",
" output = []\n",
" while state is not None:\n",
" production, state = choice(grammar[state])\n",
" index = np.random.randint(len(grammar[state]))\n",
" production, state = grammar[state][index]\n",
" if isinstance(production, list):\n",
" production = generate_string(grammar=production)\n",
" output.append(production)\n",
@ -2130,7 +2097,7 @@
},
{
"cell_type": "code",
"execution_count": 115,
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
@ -2147,7 +2114,7 @@
},
{
"cell_type": "code",
"execution_count": 116,
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
@ -2164,17 +2131,15 @@
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"collapsed": true
},
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
"def generate_corrupted_string(grammar, chars=\"BEPSTVX\"):\n",
" good_string = generate_string(grammar)\n",
" index = np.random.randint(len(good_string))\n",
" good_char = good_string[index]\n",
" bad_char = choice(list(set(chars) - set(good_char)))\n",
" bad_char = np.random.choice(sorted(set(chars) - set(good_char)))\n",
" return good_string[:index] + bad_char + good_string[index + 1:]"
]
},
@ -2187,7 +2152,7 @@
},
{
"cell_type": "code",
"execution_count": 118,
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
@ -2204,10 +2169,8 @@
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": true
},
"execution_count": 120,
"metadata": {},
"outputs": [],
"source": [
"def string_to_one_hot_vectors(string, n_steps, chars=\"BEPSTVX\"):\n",
@ -2220,7 +2183,7 @@
},
{
"cell_type": "code",
"execution_count": 120,
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
@ -2236,10 +2199,8 @@
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {
"collapsed": true
},
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"def generate_dataset(size):\n",
@ -2260,10 +2221,8 @@
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {
"collapsed": true
},
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"X_train, l_train, y_train = generate_dataset(10000)"
@ -2278,7 +2237,7 @@
},
{
"cell_type": "code",
"execution_count": 123,
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
@ -2294,7 +2253,7 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 125,
"metadata": {},
"outputs": [],
"source": [
@ -2310,7 +2269,7 @@
},
{
"cell_type": "code",
"execution_count": 125,
"execution_count": 126,
"metadata": {},
"outputs": [],
"source": [
@ -2328,10 +2287,8 @@
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"collapsed": true
},
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"reset_graph()\n",
@ -2379,10 +2336,8 @@
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {
"collapsed": true
},
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"X_val, l_val, y_val = generate_dataset(5000)"
@ -2390,7 +2345,7 @@
},
{
"cell_type": "code",
"execution_count": 128,
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
@ -2423,13 +2378,13 @@
},
{
"cell_type": "code",
"execution_count": 129,
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"test_strings = [\n",
" \"BPBTSSSSSSSSSSSSXXTTTTTVPXTTVPXTTTTTTTVPXVPXVPXTTTVVETE\",\n",
" \"BPBTSSSSSSSSSSSSXXTTTTTVPXTTVPXTTTTTTTVPXVPXVPXTTTVVEPE\"]\n",
" \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVETE\",\n",
" \"BPBTSSSSSSSXXTTVPXVPXTTTTTVVEPE\"]\n",
"l_test = np.array([len(s) for s in test_strings])\n",
"max_length = l_test.max()\n",
"X_test = [string_to_one_hot_vectors(s, n_steps=max_length)\n",
@ -2449,7 +2404,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Ta-da! It worked fine. The RNN found the correct answers with absolute confidence. :)"
"Ta-da! It worked fine. The RNN found the correct answers with high confidence. :)"
]
},
{
@ -2469,9 +2424,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": []
}
@ -2492,7 +2445,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.6"
},
"nav_menu": {},
"toc": {