Compute the losses only when needed
parent
6311ef8184
commit
935f26de1c
|
@ -1521,10 +1521,10 @@
|
||||||
"for iteration in range(n_iterations):\n",
|
"for iteration in range(n_iterations):\n",
|
||||||
" logits = X_train.dot(Theta)\n",
|
" logits = X_train.dot(Theta)\n",
|
||||||
" Y_proba = softmax(logits)\n",
|
" Y_proba = softmax(logits)\n",
|
||||||
" loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))\n",
|
|
||||||
" error = Y_proba - Y_train_one_hot\n",
|
|
||||||
" if iteration % 500 == 0:\n",
|
" if iteration % 500 == 0:\n",
|
||||||
|
" loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))\n",
|
||||||
" print(iteration, loss)\n",
|
" print(iteration, loss)\n",
|
||||||
|
" error = Y_proba - Y_train_one_hot\n",
|
||||||
" gradients = 1/m * X_train.T.dot(error)\n",
|
" gradients = 1/m * X_train.T.dot(error)\n",
|
||||||
" Theta = Theta - eta * gradients"
|
" Theta = Theta - eta * gradients"
|
||||||
]
|
]
|
||||||
|
@ -1590,12 +1590,12 @@
|
||||||
"for iteration in range(n_iterations):\n",
|
"for iteration in range(n_iterations):\n",
|
||||||
" logits = X_train.dot(Theta)\n",
|
" logits = X_train.dot(Theta)\n",
|
||||||
" Y_proba = softmax(logits)\n",
|
" Y_proba = softmax(logits)\n",
|
||||||
" xentropy_loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))\n",
|
|
||||||
" l2_loss = 1/2 * np.sum(np.square(Theta[1:]))\n",
|
|
||||||
" loss = xentropy_loss + alpha * l2_loss\n",
|
|
||||||
" error = Y_proba - Y_train_one_hot\n",
|
|
||||||
" if iteration % 500 == 0:\n",
|
" if iteration % 500 == 0:\n",
|
||||||
|
" xentropy_loss = -np.mean(np.sum(Y_train_one_hot * np.log(Y_proba + epsilon), axis=1))\n",
|
||||||
|
" l2_loss = 1/2 * np.sum(np.square(Theta[1:]))\n",
|
||||||
|
" loss = xentropy_loss + alpha * l2_loss\n",
|
||||||
" print(iteration, loss)\n",
|
" print(iteration, loss)\n",
|
||||||
|
" error = Y_proba - Y_train_one_hot\n",
|
||||||
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n",
|
" gradients = 1/m * X_train.T.dot(error) + np.r_[np.zeros([1, n_outputs]), alpha * Theta[1:]]\n",
|
||||||
" Theta = Theta - eta * gradients"
|
" Theta = Theta - eta * gradients"
|
||||||
]
|
]
|
||||||
|
@ -1793,7 +1793,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.8"
|
"version": "3.7.9"
|
||||||
},
|
},
|
||||||
"nav_menu": {},
|
"nav_menu": {},
|
||||||
"toc": {
|
"toc": {
|
||||||
|
|
Loading…
Reference in New Issue