-
Notifications
You must be signed in to change notification settings - Fork 908
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enforce 1 session = 1 run
#1329
Changes from 9 commits
7a6e6fa
a4b2148
697ee3f
1af6cce
fce25c6
55fb343
b30faa3
4952dfb
4142059
f71a697
ae9db52
e42f9ac
86bddc9
7b87918
00168c8
7abd4cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -290,7 +290,6 @@ def _get_catalog( | |
feed_dict=feed_dict, | ||
save_version=save_version, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment they will always be the same, but this goes back to the question whether we should allow users to have a custom |
||
load_versions=load_versions, | ||
run_id=self.run_id or save_version, | ||
) | ||
return catalog | ||
|
||
|
@@ -335,22 +334,6 @@ def _get_config_credentials(self) -> Dict[str, Any]: | |
conf_creds = {} | ||
return conf_creds | ||
|
||
@property | ||
def run_id(self) -> Union[None, str]: | ||
"""Unique identifier for a run, defaults to None. | ||
If `run_id` is None, `save_version` will be used instead. | ||
""" | ||
return self._get_run_id() | ||
|
||
def _get_run_id( # pylint: disable=no-self-use | ||
self, *args, **kwargs # pylint: disable=unused-argument | ||
) -> Union[None, str]: | ||
"""A hook for generating a unique identifier for a | ||
run, defaults to None. | ||
If None, `save_version` will be used instead. | ||
""" | ||
return None | ||
|
||
|
||
class KedroContextError(Exception): | ||
"""Error occurred when loading project and running context pipeline.""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,14 @@ def _jsonify_cli_context(ctx: click.core.Context) -> Dict[str, Any]: | |
} | ||
|
||
|
||
class KedroSessionError(Exception): | ||
"""``KedroSessionError`` raised by ``KedroSession`` | ||
in case of run failure as part of a session. | ||
merelcht marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
|
||
pass | ||
|
||
|
||
class KedroSession: | ||
"""``KedroSession`` is the object that is responsible for managing the lifecycle | ||
of a Kedro run. | ||
|
@@ -94,18 +102,20 @@ class KedroSession: | |
>>> | ||
""" | ||
|
||
def __init__( | ||
def __init__( # pylint: disable=too-many-arguments | ||
self, | ||
session_id: str, | ||
package_name: str = None, | ||
project_path: Union[Path, str] = None, | ||
save_on_close: bool = False, | ||
run_called: bool = False, | ||
merelcht marked this conversation as resolved.
Show resolved
Hide resolved
|
||
): | ||
self._project_path = Path(project_path or Path.cwd()).resolve() | ||
self.session_id = session_id | ||
self.save_on_close = save_on_close | ||
self._package_name = package_name | ||
self._store = self._init_store() | ||
self._run_called = run_called | ||
|
||
hook_manager = _create_hook_manager() | ||
_register_hooks(hook_manager, settings.HOOKS) | ||
|
@@ -318,6 +328,8 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
defined by `register_pipelines`. | ||
Exception: Any uncaught exception during the run will be re-raised | ||
after being passed to ``on_pipeline_error`` hook. | ||
KedroSessionError: If more than one run is attempted to be executed during | ||
a single session. | ||
Returns: | ||
Any node outputs that cannot be processed by the ``DataCatalog``. | ||
These are returned in a dictionary, where the keys are defined | ||
|
@@ -327,7 +339,14 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
# Report project name | ||
self._logger.info("** Kedro project %s", self._project_path.name) | ||
|
||
save_version = run_id = self.store["session_id"] | ||
if self._run_called: | ||
raise KedroSessionError( | ||
"A run has already been completed as part of the" | ||
" active KedroSession. KedroSession has a 1-1 mapping with" | ||
" runs, and thus only one run should be executed per session." | ||
) | ||
|
||
save_version = self.store["session_id"] | ||
extra_params = self.store.get("extra_params") or {} | ||
context = self.load_context() | ||
|
||
|
@@ -352,7 +371,7 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
) | ||
|
||
record_data = { | ||
"run_id": run_id, | ||
"session_id": self.store["session_id"], | ||
"project_path": self._project_path.as_posix(), | ||
"env": context.env, | ||
"kedro_version": kedro_version, | ||
|
@@ -379,7 +398,8 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
) | ||
|
||
try: | ||
run_result = runner.run(filtered_pipeline, catalog, hook_manager, run_id) | ||
run_result = runner.run(filtered_pipeline, catalog, hook_manager) | ||
self._run_called = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this is just in case the run is successful. If there's a problem with the pipeline halfway through, we can call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, so my thinking was that if you're in an interactive workflow to experiment/debug your pipeline it would be overhead if you need to recreate a new |
||
except Exception as error: | ||
hook_manager.hook.on_pipeline_error( | ||
error=error, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was wondering whether GE would still work without the
run_id
argument here. It appears to be an optional argument but I think it might be quite useful for users to keep track of validations done over multiple runs.