From c50b5d9b3b5754f8bb84a5b290a22b3688c7d30a Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Thu, 30 Nov 2017 17:36:19 +0100 Subject: [PATCH 1/2] capsnet: fix margin loss formula --- extra_capsnets.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra_capsnets.ipynb b/extra_capsnets.ipynb index 67e67bd..89e343e 100644 --- a/extra_capsnets.ipynb +++ b/extra_capsnets.ipynb @@ -1191,7 +1191,7 @@ "source": [ "The paper uses a special margin loss to make it possible to detect two or more different digits in each image:\n", "\n", - "$ L_k = T_k \\max(0, m^{+} - \\|\\mathbf{v}_k\\|)^2 - \\lambda (1 - T_k) \\max(0, \\|\\mathbf{v}_k\\| - m^{-})^2$\n", + "$ L_k = T_k \\max(0, m^{+} - \\|\\mathbf{v}_k\\|)^2 + \\lambda (1 - T_k) \\max(0, \\|\\mathbf{v}_k\\| - m^{-})^2$\n", "\n", "* $T_k$ is equal to 1 if the digit of class $k$ is present, or 0 otherwise.\n", "* In the paper, $m^{+} = 0.9$, $m^{-} = 0.1$ and $\\lambda = 0.5$.\n", From 63c1523528dae7fa10878b0af3f6bc71765ff11b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Thu, 7 Dec 2017 18:57:30 -0800 Subject: [PATCH 2/2] Replace n_inputs with n_outputs, fixes #125 --- 04_training_linear_models.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/04_training_linear_models.ipynb b/04_training_linear_models.ipynb index c0bea14..a32fdea 100644 --- a/04_training_linear_models.ipynb +++ b/04_training_linear_models.ipynb @@ -1909,7 +1909,7 @@ " error = Y_proba - Y_train_one_hot\n", " if iteration % 500 == 0:\n", " print(iteration, loss)\n", - " gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_inputs]), alpha * Theta[1:]]\n", + " gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n", " Theta = Theta - eta * gradients" ] }, @@ -1987,7 +1987,7 @@ " l2_loss = 1/2 * np.sum(np.square(Theta[1:]))\n", " loss = xentropy_loss + alpha * l2_loss\n", " error = Y_proba - Y_train_one_hot\n", - " gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_inputs]), alpha * Theta[1:]]\n", + " gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n", " Theta = Theta - eta * gradients\n", "\n", " logits = X_valid.dot(Theta)\n",