Skip to content

Commit

Permalink
Don't round logged running_loss_avg
Browse files Browse the repository at this point in the history
  • Loading branch information
joyce-chen-uni committed Aug 28, 2024
1 parent bf32572 commit 0467865
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion llmfoundry/callbacks/kill_loss_spike_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _handle_loss_spike(
else:
raise LossSpikeError(
outlier_multiplier=self.outlier_multiplier,
running_loss_avg=round(running_loss_avg),
running_loss_avg=running_loss_avg,
outlier_counter=self.outlier_counter,
loss_window=list(self.loss_window),
)
Expand Down
4 changes: 2 additions & 2 deletions llmfoundry/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,12 +395,12 @@ class LossSpikeError(UserError):
def __init__(
self,
outlier_multiplier: float,
running_loss_avg: int,
running_loss_avg: float,
outlier_counter: int,
loss_window: list[int],
) -> None:
message = f'Training stopped due to a loss spike. The training loss was more than {outlier_multiplier} times greater than \
the running average loss (approx. {running_loss_avg}) over {outlier_counter} consecutive training steps. \
the running average loss (approx. {round(running_loss_avg, 1)}) over {outlier_counter} consecutive training steps. \
Please try submitting the run again with a lower learning rate.'

super().__init__(
Expand Down

0 comments on commit 0467865

Please sign in to comment.