Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add kedro catalog factory list CLI command #2796

1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
## Major features and improvements
* Added dataset factories feature which uses pattern matching to reduce the number of catalog entries.
* Activated all built-in resolvers by default for `OmegaConfigLoader` except for `oc.env`.
* Added `kedro catalog factory list` CLI command.

## Bug fixes and other changes
* Updated `kedro catalog list` to work with dataset factories.
Expand Down
20 changes: 20 additions & 0 deletions kedro/framework/cli/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,26 @@ def create_catalog(metadata: ProjectMetadata, pipeline_name, env):
click.echo("All datasets are already configured.")


@catalog.group()
def factory():
"""Commands for working with catalog dataset factories"""


@factory.command("list")
@env_option
@click.pass_obj
def list_factories(metadata: ProjectMetadata, env):
"""Show all dataset factories in the catalog, ranked by priority by which they are matched."""
session = _create_session(metadata.package_name, env=env)
context = session.load_context()

catalog_factories = context.catalog._dataset_patterns
if catalog_factories:
click.echo(yaml.dump(list(catalog_factories.keys())))
else:
click.echo("There are no dataset factories in the catalog.")


def _add_missing_datasets_to_catalog(missing_ds, catalog_path):
if catalog_path.is_file():
catalog_config = yaml.safe_load(catalog_path.read_text()) or {}
Expand Down
84 changes: 84 additions & 0 deletions tests/framework/cli/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,33 @@ def fake_catalog_config():
return config


@pytest.fixture
def fake_catalog_with_overlapping_factories():
config = {
"an_example_dataset": {
"type": "pandas.CSVDataSet",
"filepath": "dummy_filepath",
},
"an_example_{placeholder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
"an_example_{place}_{holder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
"on_{example_placeholder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
"an_{example_placeholder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
}
return config


@pytest.mark.usefixtures(
"chdir_to_dummy_project", "fake_load_context", "mock_pipelines"
)
Expand Down Expand Up @@ -360,3 +387,60 @@ def test_bad_env(self, fake_project_cli, fake_metadata):

assert result.exit_code
assert "Unable to instantiate Kedro session" in result.output


@pytest.mark.usefixtures(
"chdir_to_dummy_project", "fake_load_context", "mock_pipelines"
)
def test_list_catalog_factories(
merelcht marked this conversation as resolved.
Show resolved Hide resolved
AhdraMeraliQB marked this conversation as resolved.
Show resolved Hide resolved
fake_project_cli,
fake_metadata,
mocker,
fake_load_context,
fake_catalog_with_overlapping_factories,
):
yaml_dump_mock = mocker.patch("yaml.dump", return_value="Result YAML")
mocked_context = fake_load_context.return_value
mocked_context.catalog = DataCatalog.from_config(
fake_catalog_with_overlapping_factories
)

result = CliRunner().invoke(
fake_project_cli, ["catalog", "factory", "list"], obj=fake_metadata
)
assert not result.exit_code

expected_patterns_sorted = [
"an_example_{place}_{holder}",
"an_example_{placeholder}",
"an_{example_placeholder}",
"on_{example_placeholder}",
]

assert yaml_dump_mock.call_count == 1
assert yaml_dump_mock.call_args[0][0] == expected_patterns_sorted


@pytest.mark.usefixtures(
"chdir_to_dummy_project",
"fake_load_context",
)
def test_list_factories_with_no_factories(
fake_project_cli, fake_metadata, fake_load_context
):
mocked_context = fake_load_context.return_value

catalog_data_sets = {
"iris_data": CSVDataSet("test.csv"),
"intermediate": MemoryDataset(),
"not_used": CSVDataSet("test2.csv"),
}
mocked_context.catalog = DataCatalog(data_sets=catalog_data_sets)

result = CliRunner().invoke(
fake_project_cli, ["catalog", "factory", "list"], obj=fake_metadata
)

assert not result.exit_code
expected_output = "There are no dataset factories in the catalog."
assert expected_output in result.output
21 changes: 11 additions & 10 deletions tests/tools/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,18 @@ def test_get_cli_structure_help(self, mocker, fake_metadata):
assert isinstance(help_cli_structure, dict)
assert isinstance(help_cli_structure["kedro"], dict)

for k, v in help_cli_structure["kedro"].items():
assert isinstance(k, str)
if isinstance(v, dict):
for sub_key in v:
assert isinstance(help_cli_structure["kedro"][k][sub_key], str)
assert help_cli_structure["kedro"][k][sub_key].startswith(
"Usage: [OPTIONS]"
)
elif isinstance(v, str):
assert v.startswith("Usage: [OPTIONS]")
self.recursively_check_cli_structure(help_cli_structure["kedro"])

assert sorted(list(help_cli_structure["kedro"])) == sorted(
DEFAULT_KEDRO_COMMANDS
)

def recursively_check_cli_structure(self, structure):
for k, v in structure.items():
assert isinstance(k, str)
if isinstance(v, str):
assert v.startswith("Usage: [OPTIONS]")
elif isinstance(v, dict):
self.recursively_check_cli_structure(v)
else: # Should never be reached
pytest.fail()