-
Notifications
You must be signed in to change notification settings - Fork 909
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enforce 1 session = 1 run
#1329
Changes from 14 commits
7a6e6fa
a4b2148
697ee3f
1af6cce
fce25c6
55fb343
b30faa3
4952dfb
4142059
f71a697
ae9db52
e42f9ac
86bddc9
7b87918
00168c8
7abd4cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,14 @@ def _jsonify_cli_context(ctx: click.core.Context) -> Dict[str, Any]: | |
} | ||
|
||
|
||
class KedroSessionError(Exception): | ||
"""``KedroSessionError`` raised by ``KedroSession`` | ||
in the case that multiple runs are attempted in one session. | ||
""" | ||
|
||
pass | ||
|
||
|
||
class KedroSession: | ||
"""``KedroSession`` is the object that is responsible for managing the lifecycle | ||
of a Kedro run. | ||
|
@@ -106,6 +114,7 @@ def __init__( | |
self.save_on_close = save_on_close | ||
self._package_name = package_name | ||
self._store = self._init_store() | ||
self._run_called = False | ||
|
||
hook_manager = _create_hook_manager() | ||
_register_hooks(hook_manager, settings.HOOKS) | ||
|
@@ -318,6 +327,8 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
defined by `register_pipelines`. | ||
Exception: Any uncaught exception during the run will be re-raised | ||
after being passed to ``on_pipeline_error`` hook. | ||
KedroSessionError: If more than one run is attempted to be executed during | ||
a single session. | ||
Returns: | ||
Any node outputs that cannot be processed by the ``DataCatalog``. | ||
These are returned in a dictionary, where the keys are defined | ||
|
@@ -327,7 +338,15 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
# Report project name | ||
self._logger.info("** Kedro project %s", self._project_path.name) | ||
|
||
save_version = run_id = self.store["session_id"] | ||
if self._run_called: | ||
raise KedroSessionError( | ||
"A run has already been completed as part of the" | ||
" active KedroSession. KedroSession has a 1-1 mapping with" | ||
" runs, and thus only one run should be executed per session." | ||
) | ||
|
||
session_id = self.store["session_id"] | ||
save_version = session_id | ||
extra_params = self.store.get("extra_params") or {} | ||
context = self.load_context() | ||
|
||
|
@@ -352,7 +371,7 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
) | ||
|
||
record_data = { | ||
"run_id": run_id, | ||
"session_id": session_id, | ||
"project_path": self._project_path.as_posix(), | ||
"env": context.env, | ||
"kedro_version": kedro_version, | ||
|
@@ -368,7 +387,8 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
} | ||
|
||
catalog = context._get_catalog( | ||
save_version=save_version, load_versions=load_versions | ||
save_version=save_version, | ||
load_versions=load_versions, | ||
) | ||
|
||
# Run the runner | ||
|
@@ -379,7 +399,10 @@ def run( # pylint: disable=too-many-arguments,too-many-locals | |
) | ||
|
||
try: | ||
run_result = runner.run(filtered_pipeline, catalog, hook_manager, run_id) | ||
run_result = runner.run( | ||
filtered_pipeline, catalog, hook_manager, session_id | ||
) | ||
self._run_called = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this is just in case the run is successful. If there's a problem with the pipeline halfway through, we can call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, so my thinking was that if you're in an interactive workflow to experiment/debug your pipeline it would be overhead if you need to recreate a new |
||
except Exception as error: | ||
hook_manager.hook.on_pipeline_error( | ||
error=error, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can
save_version
ever be different fromsession_id
now actually? 🤔 On seconds thoughts maybe we don't need to keep this in the hook spec after all if it's just a duplicate ofsave_version
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment they will always be the same, but this goes back to the question whether we should allow users to have a custom
save_version
, which requires user research. With the mindset of "don't add things that could potentially be used in the future", I'll remove it for now.