Merge branch 'master' into ziembla-mod
commit
2c9b0dab8a
|
@ -1909,7 +1909,7 @@
|
||||||
" error = Y_proba - Y_train_one_hot\n",
|
" error = Y_proba - Y_train_one_hot\n",
|
||||||
" if iteration % 500 == 0:\n",
|
" if iteration % 500 == 0:\n",
|
||||||
" print(iteration, loss)\n",
|
" print(iteration, loss)\n",
|
||||||
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_inputs]), alpha * Theta[1:]]\n",
|
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n",
|
||||||
" Theta = Theta - eta * gradients"
|
" Theta = Theta - eta * gradients"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -1987,7 +1987,7 @@
|
||||||
" l2_loss = 1/2 * np.sum(np.square(Theta[1:]))\n",
|
" l2_loss = 1/2 * np.sum(np.square(Theta[1:]))\n",
|
||||||
" loss = xentropy_loss + alpha * l2_loss\n",
|
" loss = xentropy_loss + alpha * l2_loss\n",
|
||||||
" error = Y_proba - Y_train_one_hot\n",
|
" error = Y_proba - Y_train_one_hot\n",
|
||||||
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_inputs]), alpha * Theta[1:]]\n",
|
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n",
|
||||||
" Theta = Theta - eta * gradients\n",
|
" Theta = Theta - eta * gradients\n",
|
||||||
"\n",
|
"\n",
|
||||||
" logits = X_valid.dot(Theta)\n",
|
" logits = X_valid.dot(Theta)\n",
|
||||||
|
|
|
@ -1203,7 +1203,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"The paper uses a special margin loss to make it possible to detect two or more different digits in each image:\n",
|
"The paper uses a special margin loss to make it possible to detect two or more different digits in each image:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"$ L_k = T_k \\max(0, m^{+} - \\|\\mathbf{v}_k\\|)^2 - \\lambda (1 - T_k) \\max(0, \\|\\mathbf{v}_k\\| - m^{-})^2$\n",
|
"$ L_k = T_k \\max(0, m^{+} - \\|\\mathbf{v}_k\\|)^2 + \\lambda (1 - T_k) \\max(0, \\|\\mathbf{v}_k\\| - m^{-})^2$\n",
|
||||||
"\n",
|
"\n",
|
||||||
"* $T_k$ is equal to 1 if the digit of class $k$ is present, or 0 otherwise.\n",
|
"* $T_k$ is equal to 1 if the digit of class $k$ is present, or 0 otherwise.\n",
|
||||||
"* In the paper, $m^{+} = 0.9$, $m^{-} = 0.1$ and $\\lambda = 0.5$.\n",
|
"* In the paper, $m^{+} = 0.9$, $m^{-} = 0.1$ and $\\lambda = 0.5$.\n",
|
||||||
|
|
Loading…
Reference in New Issue