From b07768dbc0370557a1a25eec56eedbabb5f93821 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 19 May 2023 10:57:21 +0200 Subject: [PATCH] hook implementation --- kedro_azureml/hooks.py | 37 +++++++++++++++++++++++++++++++++++++ pyproject.toml | 3 +++ 2 files changed, 40 insertions(+) create mode 100644 kedro_azureml/hooks.py diff --git a/kedro_azureml/hooks.py b/kedro_azureml/hooks.py new file mode 100644 index 0000000..950e2d4 --- /dev/null +++ b/kedro_azureml/hooks.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from kedro.extras.datasets.pickle import PickleDataSet +from kedro.framework.hooks import hook_impl + + +class AzureMLLocalRunHook: + """Hook class that allows local runs using AML datasets. + + This class hooks in before pipeline run and does the following + given the pipeline that is to be run: + - change the dataset type to PickleDataSet + for each dataset that uses the `azureml://` protocol but is not an input + dataset to AzureMLLocalOutputDataset + """ + + @hook_impl + def before_pipeline_run(self, run_params, pipeline, catalog): + """Hook implementation to change dataset types to PickleDataSet + for local runs. + + Args: + run_params: The parameters that are passed to the run command. + pipeline: The ``Pipeline`` object representing the pipeline to be run. + catalog: The ``DataCatalog`` from which to fetch data. + """ + for dataset_name, dataset in catalog._data_sets.items(): + if hasattr(dataset, "_protocol") and (dataset._protocol == "azureml"): + if dataset_name not in pipeline.inputs(): + project_path = Path(run_params["project_path"]) + new_filepath = ( + project_path / "data" / "local_run" / dataset._filepath.name + ) + catalog._data_sets[dataset_name] = PickleDataSet(str(new_filepath)) + + +azureml_local_run_hook = AzureMLLocalRunHook() diff --git a/pyproject.toml b/pyproject.toml index 0d9517d..3883281 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,3 +61,6 @@ pandas = "^1.4.3" [tool.poetry.plugins."kedro.project_commands"] "azureml" = "kedro_azureml.cli:commands" + +[tool.poetry.plugins."kedro.hooks"] +"azure_local_run_hook" = "kedro_azureml.hooks:azureml_local_run_hook" \ No newline at end of file