Skip to content

Commit

Permalink
Add Mlflow 403 PL UserError (#1623)
Browse files Browse the repository at this point in the history
Co-authored-by: Daniel King <[email protected]>
  • Loading branch information
mattyding and dakinggg committed Nov 1, 2024
1 parent b02578b commit 7f03bcb
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
12 changes: 11 additions & 1 deletion llmfoundry/callbacks/hf_checkpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

from llmfoundry.models.mpt import MPTConfig, MPTForCausalLM
from llmfoundry.models.utils import init_empty_weights
from llmfoundry.utils.exceptions import StoragePermissionError
from llmfoundry.utils.huggingface_hub_utils import \
edit_files_for_hf_compatibility

Expand Down Expand Up @@ -297,7 +298,16 @@ def run_event(self, event: Event, state: State, logger: Logger) -> None:
+ f'Got {type(state.model)} instead.',
)
if self.remote_ud is not None:
self.remote_ud.init(state, logger)
try:
self.remote_ud.init(state, logger)
except PermissionError as e:
if 'Client Error' in str(
e,
): # thrown from composer.utils._wrap_mlflow_exceptions
raise StoragePermissionError(
'Error when write to save_folder.',
) from e
raise e
state.callbacks.append(self.remote_ud)

if self.mlflow_registered_model_name is not None:
Expand Down
16 changes: 16 additions & 0 deletions llmfoundry/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
'MisconfiguredHfDatasetError',
'DatasetTooSmallError',
'RunTimeoutError',
'StoragePermissionError',
'UCNotEnabledError',
'DeltaTableNotFoundError',
]
Expand Down Expand Up @@ -528,6 +529,21 @@ def __str__(self):
return self.message


class StoragePermissionError(UserError):
"""Error thrown due to invalid permissions accessing blob storage."""

def __init__(self, message: str) -> None:
self.message = message
super().__init__(message)

def __reduce__(self):
# Return a tuple of class, a tuple of arguments, and optionally state
return (StoragePermissionError, (self.message,))

def __str__(self):
return self.message


class UCNotEnabledError(UserError):
"""Error thrown when user does not have UC enabled on their cluster."""

Expand Down

0 comments on commit 7f03bcb

Please sign in to comment.