Update autodiff notebook to TF 2.x

2020-04-12 18:43:31 +12:00 · 2020-04-12 18:43:31 +12:00 · 1451060165
parent 15b5ba5e10
commit 1451060165
1 changed files with 61 additions and 72 deletions
--- a/work_in_progress/extra_autodiff.ipynb
+++ b/work_in_progress/extra_autodiff.ipynb
@ -37,7 +37,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@ -58,7 +58,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@ -75,7 +75,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -106,7 +106,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@ -116,7 +116,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -146,7 +146,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@ -164,7 +164,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -174,7 +174,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -192,12 +192,12 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "The good news is that it is pretty easy to compute the Hessians. First let's create functions that compute the first order derivatives (also called Jacobians):"
+    "The good news is that it is pretty easy to compute the Hessians. First let's create functions that compute the first order partial derivatives (also called Jacobians):"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@ -219,7 +219,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@ -229,7 +229,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@ -259,7 +259,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@ -307,7 +307,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@ -325,7 +325,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
@ -391,7 +391,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@ -433,7 +433,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
@ -452,7 +452,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@ -468,7 +468,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
@ -480,7 +480,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
@ -548,7 +548,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@ -590,7 +590,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
@ -606,7 +606,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
@ -622,7 +622,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
@ -641,7 +641,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
@ -658,7 +658,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
@ -667,7 +667,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
@ -698,7 +698,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
@ -752,7 +752,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
@ -766,7 +766,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
@ -775,7 +775,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
@ -784,7 +784,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
@ -793,7 +793,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
@ -816,7 +816,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
@ -825,57 +825,53 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
-    "tf.reset_default_graph()\n",
+    "x = tf.Variable(3.)\n",
+    "y = tf.Variable(4.)\n",
    "\n",
-    "x = tf.Variable(3., name=\"x\")\n",
-    "y = tf.Variable(4., name=\"y\")\n",
+    "with tf.GradientTape() as tape:\n",
    "    f = x*x*y + y + 2\n",
    "\n",
-    "jacobians = tf.gradients(f, [x, y])\n",
-    "\n",
-    "init = tf.global_variables_initializer()\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    init.run()\n",
-    "    f_val, jacobians_val = sess.run([f, jacobians])\n",
-    "\n",
-    "f_val, jacobians_val"
+    "jacobians = tape.gradient(f, [x, y])\n",
+    "jacobians"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Since everything is symbolic, we can compute second order derivatives, and beyond. However, when we compute the derivative of a tensor with regards to a variable that it does not depend on, instead of returning 0.0, the `gradients()` function returns None, which cannot be evaluated by `sess.run()`. So beware of `None` values. Here we just replace them with zero tensors."
+    "Since everything is symbolic, we can compute second order derivatives, and beyond:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
-    "hessians_x = tf.gradients(jacobians[0], [x, y])\n",
-    "hessians_y = tf.gradients(jacobians[1], [x, y])\n",
+    "x = tf.Variable(3.)\n",
+    "y = tf.Variable(4.)\n",
    "\n",
-    "def replace_none_with_zero(tensors):\n",
-    "    return [tensor if tensor is not None else tf.constant(0.)\n",
-    "            for tensor in tensors]\n",
+    "with tf.GradientTape(persistent=True) as tape:\n",
+    "    f = x*x*y + y + 2\n",
+    "    df_dx, df_dy = tape.gradient(f, [x, y])\n",
    "\n",
-    "hessians_x = replace_none_with_zero(hessians_x)\n",
-    "hessians_y = replace_none_with_zero(hessians_y)\n",
+    "d2f_d2x, d2f_dydx = tape.gradient(df_dx, [x, y])\n",
+    "d2f_dxdy, d2f_d2y = tape.gradient(df_dy, [x, y])\n",
+    "del tape\n",
    "\n",
-    "init = tf.global_variables_initializer()\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    init.run()\n",
-    "    hessians_x_val, hessians_y_val = sess.run([hessians_x, hessians_y])\n",
-    "\n",
-    "hessians_x_val, hessians_y_val"
+    "hessians = [[d2f_d2x, d2f_dydx], [d2f_dxdy, d2f_d2y]]\n",
+    "hessians"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that when we compute the derivative of a tensor with regards to a variable that it does not depend on, instead of returning 0.0, the `gradient()` function returns `None`."
   ]
  },
  {
@ -884,13 +880,6 @@
   "source": [
    "And that's all folks! Hope you enjoyed this notebook."
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
@ -909,7 +898,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.7.6"
  },
  "nav_menu": {
   "height": "603px",