Merge branch 'master' into ziembla-mod

main
ziembla 2017-12-10 18:53:34 +01:00
commit 2c9b0dab8a
2 changed files with 3 additions and 3 deletions

View File

@ -1909,7 +1909,7 @@
" error = Y_proba - Y_train_one_hot\n",
" if iteration % 500 == 0:\n",
" print(iteration, loss)\n",
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_inputs]), alpha * Theta[1:]]\n",
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n",
" Theta = Theta - eta * gradients"
]
},
@ -1987,7 +1987,7 @@
" l2_loss = 1/2 * np.sum(np.square(Theta[1:]))\n",
" loss = xentropy_loss + alpha * l2_loss\n",
" error = Y_proba - Y_train_one_hot\n",
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_inputs]), alpha * Theta[1:]]\n",
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n",
" Theta = Theta - eta * gradients\n",
"\n",
" logits = X_valid.dot(Theta)\n",

View File

@ -1203,7 +1203,7 @@
"source": [
"The paper uses a special margin loss to make it possible to detect two or more different digits in each image:\n",
"\n",
"$ L_k = T_k \\max(0, m^{+} - \\|\\mathbf{v}_k\\|)^2 - \\lambda (1 - T_k) \\max(0, \\|\\mathbf{v}_k\\| - m^{-})^2$\n",
"$ L_k = T_k \\max(0, m^{+} - \\|\\mathbf{v}_k\\|)^2 + \\lambda (1 - T_k) \\max(0, \\|\\mathbf{v}_k\\| - m^{-})^2$\n",
"\n",
"* $T_k$ is equal to 1 if the digit of class $k$ is present, or 0 otherwise.\n",
"* In the paper, $m^{+} = 0.9$, $m^{-} = 0.1$ and $\\lambda = 0.5$.\n",