From 67117d2849364c4ae104c0e35a146180a10dd1bb Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Wed, 4 Dec 2024 17:11:37 +0000 Subject: [PATCH 01/10] feature: integrate type mapping objects --- home/forms/search.py | 4 +- home/service/details.py | 15 +- home/service/search.py | 8 +- home/views.py | 27 +-- .../client/datahub_client.py | 15 +- .../client/search/search_client.py | 72 ++++---- .../data_platform_catalogue/entities.py | 155 +++++++----------- .../data_platform_catalogue/search_types.py | 4 +- .../tests/client/search/test_search_client.py | 31 ++-- .../test_integration_with_datahub_server.py | 12 +- templates/partial/search_result.html | 4 +- tests/conftest.py | 28 ++-- tests/home/service/test_glossary.py | 8 +- .../test_interact_with_search_results.py | 7 +- .../test_search_result_metadata.py | 5 +- 15 files changed, 186 insertions(+), 209 deletions(-) diff --git a/home/forms/search.py b/home/forms/search.py index 51001651..d0565f34 100644 --- a/home/forms/search.py +++ b/home/forms/search.py @@ -1,8 +1,8 @@ from copy import deepcopy from urllib.parse import urlencode +from data_platform_catalogue.entities import FindMoJDataEntityType from data_platform_catalogue.search_types import DomainOption -from data_platform_catalogue.entities import EntityTypes from django import forms from django.utils.translation import gettext as _ @@ -38,7 +38,7 @@ def get_entity_types(): return sorted( [ (entity.name, entity.value) - for entity in EntityTypes + for entity in FindMoJDataEntityType if entity.name != "GLOSSARY_TERM" ] ) diff --git a/home/service/details.py b/home/service/details.py index 0eda852c..e9526b42 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -1,7 +1,10 @@ import os from urllib.parse import urlsplit -from data_platform_catalogue.entities import EntityRef, EntityTypes, RelationshipType +from data_platform_catalogue.entities import ( + EntityRef, RelationshipType, DatabaseEntityMapper, + DashboardEntityMapper, PublicationCollectionEntityMapper, PublicationDatasetEntityMapper +) from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator from django.utils.translation import gettext as _ @@ -114,7 +117,7 @@ def _get_context(self): "entity": self.table_metadata, "entity_type": "Table", "parent_entity": self.parent_entity, - "parent_type": EntityTypes.DATABASE.name.lower(), + "parent_type": DatabaseEntityMapper.datahub_type.lower(), "h1_value": self.table_metadata.name, "has_lineage": self.has_lineage(), "lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&", # noqa: E501 @@ -161,7 +164,7 @@ def _get_context(self): self.chart_metadata.platform.display_name ), "parent_entity": self.parent_entity, - "parent_type": EntityTypes.DASHBOARD.name.lower(), + "parent_type": DashboardEntityMapper.datahub_type.lower(), "h1_value": self.chart_metadata.name, "is_access_requirements_a_url": is_access_requirements_a_url( self.chart_metadata.custom_properties.access_information.dc_access_requirements @@ -219,7 +222,7 @@ def __init__(self, urn: str): def _get_context(self): context = { "entity": self.publication_collection_metadata, - "entity_type": EntityTypes.PUBLICATION_COLLECTION.value, + "entity_type": PublicationCollectionEntityMapper.find_moj_data_type, "platform_name": friendly_platform_name( self.publication_collection_metadata.platform.display_name ), @@ -258,12 +261,12 @@ def _get_context(self): return { "entity": self.publication_dataset_metadata, - "entity_type": EntityTypes.PUBLICATION_DATASET.value, + "entity_type": PublicationDatasetEntityMapper.find_moj_data_type, "parent_entity": self.parent_entity, "platform_name": friendly_platform_name( self.publication_dataset_metadata.platform.display_name ), - "parent_type": EntityTypes.PUBLICATION_COLLECTION.name.lower(), + "parent_type": DatabaseEntityMapper.datahub_type.lower(), "h1_value": self.publication_dataset_metadata.name, # noqa: E501 "is_access_requirements_a_url": is_access_requirements_a_url( diff --git a/home/service/search.py b/home/service/search.py index 0d109efe..310b504f 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -2,7 +2,7 @@ from copy import deepcopy from typing import Any -from data_platform_catalogue.entities import EntityTypes +from data_platform_catalogue.entities import FindMoJDataEntityType from data_platform_catalogue.search_types import ( DomainOption, MultiSelectFilter, @@ -45,15 +45,15 @@ def _build_custom_property_filter( ) -> list[str]: return [f"{filter_param}{filter_value}" for filter_value in filter_value_list] - def _build_entity_types(self, entity_types: list[str]) -> tuple[EntityTypes, ...]: + def _build_entity_types(self, entity_types: list[str]) -> tuple[FindMoJDataEntityType, ...]: default_entities = tuple( entity - for entity in EntityTypes + for entity in FindMoJDataEntityType if entity.name != "GLOSSARY_TERM" ) chosen_entities = ( tuple( - EntityTypes[entity] + FindMoJDataEntityType[entity] for entity in entity_types ) if entity_types diff --git a/home/views.py b/home/views.py index ab3970d7..ef0ec1f7 100644 --- a/home/views.py +++ b/home/views.py @@ -3,6 +3,14 @@ from urllib.parse import urlparse from data_platform_catalogue.client.exceptions import EntityDoesNotExist +from data_platform_catalogue.entities import ( + ChartEntityMapper, + DashboardEntityMapper, + DatabaseEntityMapper, + PublicationCollectionEntityMapper, + PublicationDatasetEntityMapper, + TableEntityMapper, +) from data_platform_catalogue.search_types import DomainOption from django.conf import settings from django.http import Http404, HttpResponse, HttpResponseBadRequest @@ -10,7 +18,6 @@ from django.utils.translation import gettext as _ from django.views.decorators.cache import cache_control -from data_platform_catalogue.entities import EntityTypes from home.forms.search import SearchForm from home.service.details import ( ChartDetailsService, @@ -31,12 +38,12 @@ from home.service.search import SearchService type_details_map = { - EntityTypes.TABLE.url_formatted: DatasetDetailsService, - EntityTypes.DATABASE.url_formatted: DatabaseDetailsService, - EntityTypes.CHART.url_formatted: ChartDetailsService, - EntityTypes.DASHBOARD.url_formatted: DashboardDetailsService, - EntityTypes.PUBLICATION_COLLECTION.url_formatted: PublicationCollectionDetailsService, - EntityTypes.PUBLICATION_DATASET.url_formatted: PublicationDatasetDetailsService + TableEntityMapper.url_formatted: DatasetDetailsService, + DatabaseEntityMapper.url_formatted: DatabaseDetailsService, + ChartEntityMapper.url_formatted: ChartDetailsService, + DashboardEntityMapper.url_formatted: DashboardDetailsService, + PublicationCollectionEntityMapper.url_formatted: PublicationCollectionDetailsService, + PublicationDatasetEntityMapper.url_formatted: PublicationDatasetDetailsService } @@ -66,11 +73,11 @@ def details_view(request, result_type, urn): @cache_control(max_age=300, private=True) def details_view_csv(request, result_type, urn) -> HttpResponse: match result_type: - case EntityTypes.TABLE.url_formatted: + case TableEntityMapper.url_formatted: csv_formatter = DatasetDetailsCsvFormatter(DatasetDetailsService(urn)) - case EntityTypes.DATABASE.url_formatted: + case DatabaseEntityMapper.url_formatted: csv_formatter = DatabaseDetailsCsvFormatter(DatabaseDetailsService(urn)) - case EntityTypes.DASHBOARD.url_formatted: + case DashboardEntityMapper.url_formatted: csv_formatter = DashboardDetailsCsvFormatter(DashboardDetailsService(urn)) case _: logging.error("Invalid result type for csv details view %s", result_type) diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index 28c92e2c..a8f8118c 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -63,7 +63,10 @@ Database, EntityRef, EntitySummary, - EntityTypes, + FindMoJDataEntityMapper, + TableEntityMapper, + ChartEntityMapper, + DatabaseEntityMapper, Governance, PublicationCollection, PublicationDataset, @@ -203,10 +206,10 @@ def search( query: str = "*", count: int = 20, page: str | None = None, - result_types: Sequence[EntityTypes] = ( - EntityTypes.TABLE, - EntityTypes.CHART, - EntityTypes.DATABASE, + result_types: Sequence[FindMoJDataEntityMapper] = ( + TableEntityMapper, + ChartEntityMapper, + DatabaseEntityMapper, ), filters: Sequence[MultiSelectFilter] | None = None, sort: SortOption | None = None, @@ -395,7 +398,7 @@ def get_publication_collection_details(self, urn: str) -> PublicationCollection: child_relations = parse_relations( relationship_type=RelationshipType.CHILD, relations_list=[response["relationships"]], - entity_type_of_relations=EntityTypes.PUBLICATION_DATASET.url_formatted, + entity_type_of_relations=PublicationDatasetEntityMapper.url_formatted, ) relations_to_display = self.list_relations_to_display(child_relations) diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index 00223a34..8456f8af 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -21,8 +21,15 @@ from data_platform_catalogue.entities import ( DatahubEntityType, DatahubSubtype, - EntityRef, - EntityTypes, + FindMoJDataEntityMapper, + TableEntityMapper, + ChartEntityMapper, + DatabaseEntityMapper, + DashboardEntityMapper, + PublicationDatasetEntityMapper, + PublicationCollectionEntityMapper, + GlossaryTermEntityMapper, + EntityRef ) from data_platform_catalogue.search_types import ( DomainOption, @@ -37,9 +44,6 @@ logger = logging.getLogger(__name__) -EntityTypeParsingFuncMap = namedtuple("EntityTypes", ["result_type", "parse_function"]) - - class SearchClient: def __init__(self, graph: DataHubGraph): self.graph = graph @@ -49,21 +53,21 @@ def __init__(self, graph: DataHubGraph): self.get_glossary_terms_query = get_graphql_query("getGlossaryTerms") self.get_tags_query = get_graphql_query("getTags") self.fmd_type_to_datahub_types_mapping = { - EntityTypes.TABLE.value: ( + TableEntityMapper.find_moj_data_type: ( DatahubEntityType.DATASET.value, ["Model", "Table", "Seed", "Source"], ), - EntityTypes.CHART.value: (DatahubEntityType.CHART.value, []), - EntityTypes.DATABASE.value: ( + ChartEntityMapper.find_moj_data_type: (DatahubEntityType.CHART.value, []), + DatabaseEntityMapper.find_moj_data_type: ( DatahubEntityType.CONTAINER.value, ["Database"], ), - EntityTypes.DASHBOARD.value: (DatahubEntityType.DASHBOARD.value, []), - EntityTypes.PUBLICATION_DATASET.value: ( + DashboardEntityMapper.find_moj_data_type: (DatahubEntityType.DASHBOARD.value, []), + PublicationDatasetEntityMapper.find_moj_data_type: ( DatahubEntityType.DATASET.value, ["Publication dataset"], ), - EntityTypes.PUBLICATION_COLLECTION.value: ( + PublicationCollectionEntityMapper.find_moj_data_type: ( DatahubEntityType.CONTAINER.value, ["Publication collection"], ), @@ -74,53 +78,53 @@ def __init__(self, graph: DataHubGraph): DatahubSubtype.PUBLICATION_DATASET.value, ): ( self._parse_dataset, - EntityTypes.PUBLICATION_DATASET, + PublicationDatasetEntityMapper, ), (DatahubEntityType.DATASET.value, DatahubSubtype.METRIC.value): ( self._parse_dataset, - EntityTypes.TABLE, + TableEntityMapper, ), (DatahubEntityType.DATASET.value, DatahubSubtype.TABLE.value): ( self._parse_dataset, - EntityTypes.TABLE, + TableEntityMapper, ), (DatahubEntityType.DATASET.value, DatahubSubtype.MODEL.value): ( self._parse_dataset, - EntityTypes.TABLE, + TableEntityMapper, ), (DatahubEntityType.DATASET.value, DatahubSubtype.SEED.value): ( self._parse_dataset, - EntityTypes.TABLE, + TableEntityMapper, ), (DatahubEntityType.DATASET.value, DatahubSubtype.SOURCE.value): ( self._parse_dataset, - EntityTypes.TABLE, + TableEntityMapper, ), (DatahubEntityType.CONTAINER.value, DatahubSubtype.DATABASE.value): ( self._parse_container, - EntityTypes.DATABASE, + DatabaseEntityMapper, ), ( DatahubEntityType.CONTAINER.value, DatahubSubtype.PUBLICATION_COLLECTION.value, ): ( self._parse_container, - EntityTypes.PUBLICATION_COLLECTION, + PublicationCollectionEntityMapper, ), ( DatahubEntityType.DATASET.value, DatahubSubtype.PUBLICATION_DATASET.value, ): ( self._parse_container, - EntityTypes.PUBLICATION_DATASET, + PublicationDatasetEntityMapper, ), (DatahubEntityType.CHART.value, None): ( self._parse_dataset, - EntityTypes.CHART, + ChartEntityMapper, ), (DatahubEntityType.DASHBOARD.value, None): ( self._parse_container, - EntityTypes.DASHBOARD, + DashboardEntityMapper, ), } @@ -129,10 +133,10 @@ def search( query: str = "*", count: int = 20, page: str | None = None, - result_types: Sequence[EntityTypes] = ( - EntityTypes.TABLE, - EntityTypes.CHART, - EntityTypes.DATABASE, + result_types: Sequence[FindMoJDataEntityMapper] = ( + TableEntityMapper, + ChartEntityMapper, + DatabaseEntityMapper, ), filters: Sequence[MultiSelectFilter] | None = None, sort: SortOption | None = None, @@ -146,7 +150,7 @@ def search( start = 0 if page is None else int(page) * count - fmd_entity_types = [result_type.value for result_type in result_types] + fmd_entity_types = [result_type.find_moj_data_type for result_type in result_types] entity_type_filters = [ ( MultiSelectFilter( @@ -275,7 +279,7 @@ def _get_data_collection_page_results(self, response, key_for_results: str): matched_fields: dict = {} if entity_type == "DATASET": page_results.append( - self._parse_dataset(entity, matched_fields, EntityTypes.TABLE) + self._parse_dataset(entity, matched_fields, TableEntityMapper) ) else: raise ValueError(f"Unexpected entity type: {entity_type}") @@ -283,7 +287,7 @@ def _get_data_collection_page_results(self, response, key_for_results: str): def _map_result_types( self, - result_types: Sequence[EntityTypes], + result_types: Sequence[FindMoJDataEntityMapper], ) -> list[str]: """ Map result types to Datahub EntityTypes @@ -310,7 +314,7 @@ def _parse_list_domains( return list_domain_options def _parse_dataset( - self, entity: dict[str, Any], matches, result_type: EntityTypes + self, entity: dict[str, Any], matches, result_type: FindMoJDataEntityMapper ) -> SearchResult: """ Map a dataset entity to a SearchResult @@ -333,7 +337,7 @@ def _parse_dataset( "total_parents": entity.get("relationships", {}).get("total", 0), "domain_name": domain.display_name, "domain_id": domain.urn, - "entity_types": self._parse_types_and_sub_types(entity, result_type.value), + "entity_types": self._parse_types_and_sub_types(entity, result_type.find_moj_data_type), } logger.debug(f"{metadata=}") @@ -392,7 +396,7 @@ def _parse_glossary_term(self, entity) -> SearchResult: return SearchResult( urn=entity["urn"], - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, matches={}, name=name, display_name=display_name, @@ -455,7 +459,7 @@ def _parse_global_tags(self, tag_query_results) -> list[tuple[str, str]]: return tags_list def _parse_container( - self, entity: dict[str, Any], matches, subtype: EntityTypes + self, entity: dict[str, Any], matches, subtype: FindMoJDataEntityMapper ) -> SearchResult: """ Map a Container entity to a SearchResult @@ -473,7 +477,7 @@ def _parse_container( "owner_email": owner.email, "domain_name": domain.display_name, "domain_id": domain.urn, - "entity_types": self._parse_types_and_sub_types(entity, subtype.value), + "entity_types": self._parse_types_and_sub_types(entity, subtype.find_moj_data_type), } metadata.update(custom_properties) diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index 4cd3e1fa..cbcb6f09 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -34,6 +34,16 @@ class DatahubSubtype(Enum): DATABASE = "Database" +class FindMoJDataEntityType(Enum): + TABLE = "Table" + GLOSSARY_TERM = "Glossary term" + CHART = "Chart" + DATABASE = "Database" + DASHBOARD = "Dashboard" + PUBLICATION_DATASET = "Publication dataset" + PUBLICATION_COLLECTION = "Publication collection" + + @dataclass class FindMoJDataEntityMapper: find_moj_data_type: str @@ -42,92 +52,54 @@ class FindMoJDataEntityMapper: url_formatted: str -class TableEntityMapper(FindMoJDataEntityMapper): - def __init__(self): - super().__init__( - "Table", - DatahubEntityType.DATASET.value, - ["Model", "Table", "Seed", "Source"], - "table", - ) - - -class ChartEntityMapper(FindMoJDataEntityMapper): - def __init__(self): - super().__init__("Chart", DatahubEntityType.CHART.value, [], "chart") - - -class DatabaseEntityMapper(FindMoJDataEntityMapper): - def __init__(self): - super().__init__( - "Database", DatahubEntityType.CONTAINER.value, ["Database"], "database" - ) - - -class DashboardEntityMapper(FindMoJDataEntityMapper): - def __init__(self): - super().__init__( - "Dashboard", DatahubEntityType.DASHBOARD.value, [], "dashboard" - ) - - -class PublicationDatasetEntityMapper(FindMoJDataEntityMapper): - def __init__(self): - super().__init__( - "Publication dataset", - DatahubEntityType.DATASET.value, - ["Publication dataset"], - "publication_dataset", - ) - - -class PublicationCollectionEntityMapper(FindMoJDataEntityMapper): - def __init__(self): - super().__init__( - "Publication collection", - DatahubEntityType.CONTAINER.value, - ["Publication collection"], - "publication_collection", - ) - - -class EntityTypes(Enum): - """Maps between Find MoJ data's entity type, Datahub entity types and the url_formatted representation - - Each entity has 3 properties: - - value: Human-Readable description of the entity - - datahub_entity_type: Datahub's description of the entity - - url_formatted: URL formatted representation of the entity - - ex: EntityTypes.TABLE.value returns `Table` - EntityTypes.GLOSSARY_TERM.datahub_entity_type returns `GLOSSARY_TERM`""" - - TABLE = ("Table", DatahubEntityType.DATASET.value, "table") - GLOSSARY_TERM = ( - "Glossary term", - DatahubEntityType.GLOSSARY_TERM.value, - "glossary_term", - ) - CHART = ("Chart", DatahubEntityType.CHART.value, "chart") - DATABASE = ("Database", DatahubEntityType.CONTAINER.value, "database") - DASHBOARD = ("Dashboard", DatahubEntityType.DASHBOARD.value, "dashboard") - PUBLICATION_DATASET = ( - "Publication dataset", - DatahubEntityType.DATASET.value, - "publication_dataset", - ) - PUBLICATION_COLLECTION = ( - "Publication collection", - DatahubEntityType.CONTAINER.value, - "publication_collection", - ) - - def __new__(cls, value, datahub_entity_type, url_formatted): - obj = object.__new__(cls) - obj._value_ = value - obj.datahub_entity_type = datahub_entity_type - obj.url_formatted = url_formatted - return obj +TableEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.TABLE.value, + DatahubEntityType.DATASET.value, + ["Model", "Table", "Seed", "Source"], + "table" +) + +ChartEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.CHART.value, + DatahubEntityType.CHART.value, + [], + "chart" +) + +GlossaryTermEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.GLOSSARY_TERM.value, + DatahubEntityType.GLOSSARY_TERM.value, + [], + "glossary_term" +) + +DatabaseEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.DATABASE.value, + DatahubEntityType.CONTAINER.value, + ["Database"], + "database" +) + +DashboardEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.DASHBOARD.value, + DatahubEntityType.DASHBOARD.value, + [], + "dashboard" +) + +PublicationDatasetEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.PUBLICATION_DATASET.value, + DatahubEntityType.DATASET.value, + ["Publication dataset"], + "publication_dataset" +) + +PublicationCollectionEntityMapper = FindMoJDataEntityMapper( + FindMoJDataEntityType.PUBLICATION_COLLECTION.value, + DatahubEntityType.CONTAINER.value, + ["Publication collection"], + "publication_collection" +) class Audience(Enum): @@ -610,25 +582,20 @@ class Database(Entity): ) # tables: list = Field(description="list of tables in the database") - class PublicationCollection(Entity): - """Collections of datasets that are periodically published to GOV.UK""" - + """For source system publication collections""" urn: str | None = Field( description="Unique identifier for the entity. Relates to Datahub's urn", examples=["urn:li:container:criminal_justice_stats"], ) external_url: str = Field( description="URL to view the collection", - examples=[ - "https://www.gov.uk/government/collections/civil-justice-statistics-quarterly" - ], + examples=["https://data.justice.gov.uk/prisons/criminal-jsutice/publications"], ) class PublicationDataset(Entity): - """A dataset published to GOV.UK""" - + """For source system publication collections""" urn: str | None = Field( description="Unique identifier for the entity. Relates to Datahub's urn", examples=["urn:li:dataset:(urn:li:dataPlatform:gov.uk,statistics2011,DEV)"], diff --git a/lib/datahub-client/data_platform_catalogue/search_types.py b/lib/datahub-client/data_platform_catalogue/search_types.py index 2f8f6ce0..a4d1f59b 100644 --- a/lib/datahub-client/data_platform_catalogue/search_types.py +++ b/lib/datahub-client/data_platform_catalogue/search_types.py @@ -6,8 +6,8 @@ from data_platform_catalogue.entities import ( EntityRef, - EntityTypes, GlossaryTermRef, + FindMoJDataEntityMapper, TagRef, ) @@ -64,7 +64,7 @@ class DomainOption: @dataclass class SearchResult: urn: str - result_type: EntityTypes + result_type: FindMoJDataEntityMapper name: str display_name: str = "" fully_qualified_name: str = "" diff --git a/lib/datahub-client/tests/client/search/test_search_client.py b/lib/datahub-client/tests/client/search/test_search_client.py index 6a7740b6..4436413b 100644 --- a/lib/datahub-client/tests/client/search/test_search_client.py +++ b/lib/datahub-client/tests/client/search/test_search_client.py @@ -7,7 +7,10 @@ AccessInformation, DataSummary, EntityRef, - EntityTypes, + TableEntityMapper, + DatabaseEntityMapper, + ChartEntityMapper, + GlossaryTermEntityMapper, FurtherInformation, TagRef, UsageRestrictions, @@ -122,7 +125,7 @@ def test_one_search_result(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers", display_name="customers", fully_qualified_name="jaffle_shop.customers", @@ -213,7 +216,7 @@ def test_dataset_result(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers", display_name="customers", fully_qualified_name="jaffle_shop.customers", @@ -363,7 +366,7 @@ def test_2_dataset_results_with_one_malformed_result(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers", display_name="customers", fully_qualified_name="jaffle_shop.customers", @@ -460,7 +463,7 @@ def test_full_page(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers", fully_qualified_name="jaffle_shop.customers", display_name="customers", @@ -491,7 +494,7 @@ def test_full_page(mock_graph, searcher): ), SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers2,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers2", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers2", display_name="customers2", @@ -522,7 +525,7 @@ def test_full_page(mock_graph, searcher): ), SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers3,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers3", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers3", display_name="customers3", @@ -600,7 +603,7 @@ def test_query_match(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers", display_name="customers", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers", @@ -685,7 +688,7 @@ def test_result_with_owner(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="customers", display_name="customers", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers", @@ -913,7 +916,7 @@ def test_get_glossary_terms(mock_graph, searcher): } ] }, - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, ), SearchResult( urn="urn:li:glossaryTerm:0eb7af28-62b4-4149-a6fa-72a8f1fea1e6", @@ -922,7 +925,7 @@ def test_get_glossary_terms(mock_graph, searcher): fully_qualified_name="Security classification", description="Only data that is 'official'", metadata={"parentNodes": []}, - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, ), ], ) @@ -975,7 +978,7 @@ def test_search_for_charts(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:chart:(justice-data,absconds)", - result_type=EntityTypes.CHART, + result_type=ChartEntityMapper, name="Absconds", display_name="Absconds", fully_qualified_name="Absconds", @@ -1097,7 +1100,7 @@ def test_search_for_container(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:container:test_db", - result_type=EntityTypes.DATABASE, + result_type=DatabaseEntityMapper, name="test_db", display_name="test_db", fully_qualified_name="test_db", @@ -1151,7 +1154,7 @@ def test_search_for_container(mock_graph, searcher): def test_tag_to_display(tags, result): test_search_result = SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:athena,test_db.test_table,PROD)", - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, name="test_table", display_name="test_table", fully_qualified_name="test_db.test_table", diff --git a/lib/datahub-client/tests/test_integration_with_datahub_server.py b/lib/datahub-client/tests/test_integration_with_datahub_server.py index 89cbcd3c..1a75c22c 100644 --- a/lib/datahub-client/tests/test_integration_with_datahub_server.py +++ b/lib/datahub-client/tests/test_integration_with_datahub_server.py @@ -9,25 +9,20 @@ import os import time -from datetime import datetime, timezone +from datetime import datetime import pytest from data_platform_catalogue.client.datahub_client import DataHubCatalogueClient from data_platform_catalogue.entities import ( AccessInformation, - Column, - ColumnRef, CustomEntityProperties, Database, - DataSummary, DomainRef, EntityRef, - FurtherInformation, + TableEntityMapper, Governance, OwnerRef, - RelationshipType, - Table, TagRef, UsageRestrictions, ) @@ -35,7 +30,6 @@ DomainOption, MultiSelectFilter, ) -from data_platform_catalogue.entities import EntityTypes jwt_token = os.environ.get("CATALOGUE_TOKEN") api_url = os.environ.get("CATALOGUE_URL", "") @@ -66,7 +60,7 @@ def test_search_by_domain(): response = client.search( filters=[MultiSelectFilter("domains", ["does-not-exist"])], - result_types=(EntityTypes.TABLE,), + result_types=(TableEntityMapper), ) assert response.total_results == 0 diff --git a/templates/partial/search_result.html b/templates/partial/search_result.html index c73872d4..3c3c8526 100644 --- a/templates/partial/search_result.html +++ b/templates/partial/search_result.html @@ -8,11 +8,11 @@

- {% with result_type=result.result_type.name|lower %} + {% with result_type=result.result_type.url_formatted %} {{result.name}} {% endwith %} - {{ result.result_type.value }} + {{ result.result_type.find_moj_data_type }}

{% if result.description %} diff --git a/tests/conftest.py b/tests/conftest.py index a7982375..4db8db5f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,3 @@ -from datetime import datetime, timezone -from pathlib import Path from random import choice from typing import Any, Generator from unittest.mock import MagicMock, patch @@ -15,7 +13,6 @@ from home.service.search import SearchService from home.service.search_tag_fetcher import SearchTagFetcher from notifications_python_client.notifications import NotificationsAPIClient -from pytest import CollectReport, StashKey from selenium.webdriver import ChromeOptions from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.common.by import By @@ -33,7 +30,10 @@ Database, DomainRef, EntityRef, - EntityTypes, + FindMoJDataEntityMapper, + DatabaseEntityMapper, + TableEntityMapper, + GlossaryTermEntityMapper, EntitySummary, GlossaryTermRef, Governance, @@ -44,8 +44,6 @@ ) from data_platform_catalogue.search_types import ( DomainOption, - FacetOption, - SearchFacets, SearchResponse, SearchResult, ) @@ -306,7 +304,7 @@ def page_titles(): def generate_search_result( - result_type: EntityTypes | None = None, urn=None, metadata=None + result_type: FindMoJDataEntityMapper | None = None, urn=None, metadata=None ) -> SearchResult: """ Generate a random search result @@ -316,7 +314,7 @@ def generate_search_result( return SearchResult( urn=urn or fake.unique.name(), result_type=( - choice((EntityTypes.DATABASE, EntityTypes.TABLE)) + choice((DatabaseEntityMapper, TableEntityMapper)) if result_type is None else result_type ), @@ -330,7 +328,7 @@ def generate_search_result( def search_result_from_database(database: Database): return SearchResult( urn=database.urn or "", - result_type=EntityTypes.DATABASE, + result_type=DatabaseEntityMapper, name=database.name, fully_qualified_name=database.fully_qualified_name or "", description=database.description, @@ -574,7 +572,7 @@ def example_table(name="example_table"): return generate_table_metadata(name=name) -def generate_page(page_size=20, result_type: EntityTypes | None = None): +def generate_page(page_size=20, result_type: FindMoJDataEntityMapper | None = None): """ Generate a fake search page """ @@ -712,7 +710,7 @@ def mock_get_glossary_terms_response(mock_catalogue): } ] }, - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, ), SearchResult( urn="urn:li:glossaryTerm:022b9b68-c211-47ae-aef0-2db13acfeca8", @@ -728,14 +726,14 @@ def mock_get_glossary_terms_response(mock_catalogue): } ] }, - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, ), SearchResult( urn="urn:li:glossaryTerm:0eb7af28-62b4-4149-a6fa-72a8f1fea1e6", name="Security classification", description="Only data that is 'official'", metadata={"parentNodes": []}, - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, ), ], ) @@ -792,7 +790,7 @@ def search_context(search_service): def detail_database_context(mock_catalogue): mock_catalogue.search.return_value = SearchResponse( total_results=1, - page_results=generate_page(page_size=1, result_type=EntityTypes.DATABASE), + page_results=generate_page(page_size=1, result_type=DatabaseEntityMapper), ) details_service = DatabaseDetailsService(urn="urn:li:container:test") @@ -814,7 +812,7 @@ def dataset_with_parent(mock_catalogue) -> dict[str, Any]: total_results=1, page_results=[ generate_search_result( - result_type=EntityTypes.TABLE, + result_type=TableEntityMapper, urn="table-abc", metadata={}, ) diff --git a/tests/home/service/test_glossary.py b/tests/home/service/test_glossary.py index bc743843..379a29fd 100644 --- a/tests/home/service/test_glossary.py +++ b/tests/home/service/test_glossary.py @@ -1,5 +1,5 @@ +from data_platform_catalogue.entities import GlossaryTermEntityMapper from data_platform_catalogue.search_types import SearchResult -from data_platform_catalogue.entities import EntityTypes from home.service.glossary import GlossaryService @@ -15,7 +15,7 @@ def test_get_context(self): "members": [ SearchResult( urn="urn:li:glossaryTerm:022b9b68-c211-47ae-aef0-2db13acfeca8", - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, name="IAO", description="Information asset owner.\n", matches={}, @@ -34,7 +34,7 @@ def test_get_context(self): ), SearchResult( urn="urn:li:glossaryTerm:022b9b68-c211-47ae-aef0-2db13acfeca8", - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, name="Other term", description="Term description to test groupings work", matches={}, @@ -59,7 +59,7 @@ def test_get_context(self): "members": [ SearchResult( urn="urn:li:glossaryTerm:0eb7af28-62b4-4149-a6fa-72a8f1fea1e6", - result_type=EntityTypes.GLOSSARY_TERM, + result_type=GlossaryTermEntityMapper, name="Security classification", description="Only data that is 'official'", matches={}, diff --git a/tests/integration/test_interact_with_search_results.py b/tests/integration/test_interact_with_search_results.py index ec4cbd98..918a261a 100644 --- a/tests/integration/test_interact_with_search_results.py +++ b/tests/integration/test_interact_with_search_results.py @@ -1,7 +1,6 @@ import pytest -from data_platform_catalogue.search_types import SearchResult -from data_platform_catalogue.entities import EntityTypes +from data_platform_catalogue.entities import TableEntityMapper from tests.conftest import ( generate_page, generate_table_metadata, @@ -46,7 +45,7 @@ def test_table_search_to_details(self, mock_catalogue): """ mock_search_response( mock_catalogue=mock_catalogue, - page_results=generate_page(result_type=EntityTypes.TABLE), + page_results=generate_page(result_type=TableEntityMapper), total_results=100, ) self.start_on_the_search_page() @@ -63,7 +62,7 @@ def test_table_search_to_details_accessibility(self, mock_catalogue): """ mock_search_response( mock_catalogue=mock_catalogue, - page_results=generate_page(result_type=EntityTypes.TABLE), + page_results=generate_page(result_type=TableEntityMapper), total_results=100, ) table_no_column_description = generate_table_metadata(column_description="") diff --git a/tests/integration/test_search_result_metadata.py b/tests/integration/test_search_result_metadata.py index 54db7c83..fdb3b95f 100644 --- a/tests/integration/test_search_result_metadata.py +++ b/tests/integration/test_search_result_metadata.py @@ -1,6 +1,5 @@ import pytest from data_platform_catalogue.search_types import SearchResult -from data_platform_catalogue.entities import EntityTypes from tests.conftest import mock_search_response @@ -21,7 +20,7 @@ def setup(self, live_server, selenium): def test_matched_fields_hidden(self, mock_catalogue): result = SearchResult( urn="fake-urn", - result_type=EntityTypes.DATABASE, + result_type=DatabaseEntityMapper, name="abc", fully_qualified_name="abc", description="bla bla bla", @@ -36,7 +35,7 @@ def test_matched_fields_hidden(self, mock_catalogue): def test_matched_fields_shown(self, mock_catalogue): result = SearchResult( urn="fake-urn", - result_type=EntityTypes.DATABASE, + result_type=DatabaseEntityMapper, name="abc", fully_qualified_name="abc", description="bla bla bla", From 28f62dcbe34131c00cf3478f41aa9ce0090723e9 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Wed, 4 Dec 2024 17:15:58 +0000 Subject: [PATCH 02/10] fix: test imports --- tests/integration/test_search_result_metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_search_result_metadata.py b/tests/integration/test_search_result_metadata.py index fdb3b95f..93a7e7e9 100644 --- a/tests/integration/test_search_result_metadata.py +++ b/tests/integration/test_search_result_metadata.py @@ -1,4 +1,5 @@ import pytest +from data_platform_catalogue.entities import DatabaseEntityMapper from data_platform_catalogue.search_types import SearchResult from tests.conftest import mock_search_response From 7b35a39c07becfdd85cb753c695bd435fd7c3ffe Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Wed, 4 Dec 2024 17:26:39 +0000 Subject: [PATCH 03/10] feature: use mappers to remove mapping fmd to datahub types --- .../client/search/search_client.py | 32 ++----------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index 8456f8af..b7845a59 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -52,26 +52,6 @@ def __init__(self, graph: DataHubGraph): self.list_domains_query = get_graphql_query("listDomains") self.get_glossary_terms_query = get_graphql_query("getGlossaryTerms") self.get_tags_query = get_graphql_query("getTags") - self.fmd_type_to_datahub_types_mapping = { - TableEntityMapper.find_moj_data_type: ( - DatahubEntityType.DATASET.value, - ["Model", "Table", "Seed", "Source"], - ), - ChartEntityMapper.find_moj_data_type: (DatahubEntityType.CHART.value, []), - DatabaseEntityMapper.find_moj_data_type: ( - DatahubEntityType.CONTAINER.value, - ["Database"], - ), - DashboardEntityMapper.find_moj_data_type: (DatahubEntityType.DASHBOARD.value, []), - PublicationDatasetEntityMapper.find_moj_data_type: ( - DatahubEntityType.DATASET.value, - ["Publication dataset"], - ), - PublicationCollectionEntityMapper.find_moj_data_type: ( - DatahubEntityType.CONTAINER.value, - ["Publication collection"], - ), - } self.datahub_types_to_fmd_type_and_parser_mapping = { ( DatahubEntityType.DATASET.value, @@ -150,18 +130,12 @@ def search( start = 0 if page is None else int(page) * count - fmd_entity_types = [result_type.find_moj_data_type for result_type in result_types] entity_type_filters = [ ( - MultiSelectFilter( - "_entityType", - self.fmd_type_to_datahub_types_mapping[entity_type][0], - ), - MultiSelectFilter( - "typeNames", self.fmd_type_to_datahub_types_mapping[entity_type][1] - ), + MultiSelectFilter("_entityType", result.datahub_type), + MultiSelectFilter("typeNames", result.datahub_subtypes), ) - for entity_type in fmd_entity_types + for result in result_types ] formatted_filters = map_filters(filters, entity_type_filters) From 41569496ba9c0aa29526ac015be78930aa82dcc7 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Wed, 4 Dec 2024 22:09:35 +0000 Subject: [PATCH 04/10] fix: Use DatahubSubtype Enum values --- .../data_platform_catalogue/entities.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index cbcb6f09..e8977b03 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -55,7 +55,12 @@ class FindMoJDataEntityMapper: TableEntityMapper = FindMoJDataEntityMapper( FindMoJDataEntityType.TABLE.value, DatahubEntityType.DATASET.value, - ["Model", "Table", "Seed", "Source"], + [ + DatahubSubtype.MODEL.value, + DatahubSubtype.TABLE.value, + DatahubSubtype.SEED.value, + DatahubSubtype.SOURCE.value + ], "table" ) @@ -76,7 +81,7 @@ class FindMoJDataEntityMapper: DatabaseEntityMapper = FindMoJDataEntityMapper( FindMoJDataEntityType.DATABASE.value, DatahubEntityType.CONTAINER.value, - ["Database"], + [DatahubSubtype.DATABASE.value], "database" ) @@ -90,14 +95,14 @@ class FindMoJDataEntityMapper: PublicationDatasetEntityMapper = FindMoJDataEntityMapper( FindMoJDataEntityType.PUBLICATION_DATASET.value, DatahubEntityType.DATASET.value, - ["Publication dataset"], + [DatahubSubtype.PUBLICATION_DATASET.value], "publication_dataset" ) PublicationCollectionEntityMapper = FindMoJDataEntityMapper( FindMoJDataEntityType.PUBLICATION_COLLECTION.value, DatahubEntityType.CONTAINER.value, - ["Publication collection"], + [DatahubSubtype.PUBLICATION_COLLECTION.value], "publication_collection" ) From 0b51cf402527aaca5f0cedf6be2c008d479e4b9a Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Fri, 6 Dec 2024 09:47:22 +0000 Subject: [PATCH 05/10] fix: choosing filters --- home/service/details.py | 10 ++--- home/service/search.py | 21 ++++------ .../client/search/search_client.py | 10 ++--- .../data_platform_catalogue/entities.py | 42 ++++++++++++------- templates/partial/search_result.html | 2 +- 5 files changed, 46 insertions(+), 39 deletions(-) diff --git a/home/service/details.py b/home/service/details.py index e9526b42..8f4784c1 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -117,7 +117,7 @@ def _get_context(self): "entity": self.table_metadata, "entity_type": "Table", "parent_entity": self.parent_entity, - "parent_type": DatabaseEntityMapper.datahub_type.lower(), + "parent_type": DatabaseEntityMapper.datahub_type.value.lower(), "h1_value": self.table_metadata.name, "has_lineage": self.has_lineage(), "lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&", # noqa: E501 @@ -164,7 +164,7 @@ def _get_context(self): self.chart_metadata.platform.display_name ), "parent_entity": self.parent_entity, - "parent_type": DashboardEntityMapper.datahub_type.lower(), + "parent_type": DashboardEntityMapper.datahub_type.value.lower(), "h1_value": self.chart_metadata.name, "is_access_requirements_a_url": is_access_requirements_a_url( self.chart_metadata.custom_properties.access_information.dc_access_requirements @@ -222,7 +222,7 @@ def __init__(self, urn: str): def _get_context(self): context = { "entity": self.publication_collection_metadata, - "entity_type": PublicationCollectionEntityMapper.find_moj_data_type, + "entity_type": PublicationCollectionEntityMapper.find_moj_data_type.value, "platform_name": friendly_platform_name( self.publication_collection_metadata.platform.display_name ), @@ -261,12 +261,12 @@ def _get_context(self): return { "entity": self.publication_dataset_metadata, - "entity_type": PublicationDatasetEntityMapper.find_moj_data_type, + "entity_type": PublicationDatasetEntityMapper.find_moj_data_type.value, "parent_entity": self.parent_entity, "platform_name": friendly_platform_name( self.publication_dataset_metadata.platform.display_name ), - "parent_type": DatabaseEntityMapper.datahub_type.lower(), + "parent_type": DatabaseEntityMapper.datahub_type.value.lower(), "h1_value": self.publication_dataset_metadata.name, # noqa: E501 "is_access_requirements_a_url": is_access_requirements_a_url( diff --git a/home/service/search.py b/home/service/search.py index 310b504f..e2a3c658 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -2,7 +2,7 @@ from copy import deepcopy from typing import Any -from data_platform_catalogue.entities import FindMoJDataEntityType +from data_platform_catalogue.entities import FindMoJDataEntityMapper, Mappers from data_platform_catalogue.search_types import ( DomainOption, MultiSelectFilter, @@ -45,19 +45,16 @@ def _build_custom_property_filter( ) -> list[str]: return [f"{filter_param}{filter_value}" for filter_value in filter_value_list] - def _build_entity_types(self, entity_types: list[str]) -> tuple[FindMoJDataEntityType, ...]: + def _build_entity_types(self, entity_types: list[str]) -> tuple[FindMoJDataEntityMapper, ...]: default_entities = tuple( - entity - for entity in FindMoJDataEntityType - if entity.name != "GLOSSARY_TERM" + Mapper + for Mapper in Mappers + if Mapper.datahub_type.value != "GLOSSARY_TERM" ) - chosen_entities = ( - tuple( - FindMoJDataEntityType[entity] - for entity in entity_types - ) - if entity_types - else None + chosen_entities = tuple( + Mapper + for Mapper in Mappers + if Mapper.find_moj_data_type.name in entity_types ) return chosen_entities if chosen_entities else default_entities diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index b7845a59..7b8f2e89 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -52,7 +52,7 @@ def __init__(self, graph: DataHubGraph): self.list_domains_query = get_graphql_query("listDomains") self.get_glossary_terms_query = get_graphql_query("getGlossaryTerms") self.get_tags_query = get_graphql_query("getTags") - self.datahub_types_to_fmd_type_and_parser_mapping = { + self.datahub_type.values_to_fmd_type_and_parser_mapping = { ( DatahubEntityType.DATASET.value, DatahubSubtype.PUBLICATION_DATASET.value, @@ -132,7 +132,7 @@ def search( entity_type_filters = [ ( - MultiSelectFilter("_entityType", result.datahub_type), + MultiSelectFilter("_entityType", result.datahub_type.value), MultiSelectFilter("typeNames", result.datahub_subtypes), ) for result in result_types @@ -186,7 +186,7 @@ def _parse_search_results(self, response) -> Tuple[list, list]: matched_fields = self._get_matched_fields(result=result) try: - parser, fmd_type = self.datahub_types_to_fmd_type_and_parser_mapping[ + parser, fmd_type = self.datahub_type.values_to_fmd_type_and_parser_mapping[ (entity_type, entity_subtype) ] parsed_result = parser(entity, matched_fields, fmd_type) @@ -311,7 +311,7 @@ def _parse_dataset( "total_parents": entity.get("relationships", {}).get("total", 0), "domain_name": domain.display_name, "domain_id": domain.urn, - "entity_types": self._parse_types_and_sub_types(entity, result_type.find_moj_data_type), + "entity_types": self._parse_types_and_sub_types(entity, result_type.find_moj_data_type.value), } logger.debug(f"{metadata=}") @@ -451,7 +451,7 @@ def _parse_container( "owner_email": owner.email, "domain_name": domain.display_name, "domain_id": domain.urn, - "entity_types": self._parse_types_and_sub_types(entity, subtype.find_moj_data_type), + "entity_types": self._parse_types_and_sub_types(entity, subtype.find_moj_data_type.value), } metadata.update(custom_properties) diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index e8977b03..ff4fee2f 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -46,15 +46,15 @@ class FindMoJDataEntityType(Enum): @dataclass class FindMoJDataEntityMapper: - find_moj_data_type: str - datahub_type: str + find_moj_data_type: FindMoJDataEntityType + datahub_type: DatahubEntityType datahub_subtypes: list[str] url_formatted: str TableEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.TABLE.value, - DatahubEntityType.DATASET.value, + FindMoJDataEntityType.TABLE, + DatahubEntityType.DATASET, [ DatahubSubtype.MODEL.value, DatahubSubtype.TABLE.value, @@ -65,47 +65,57 @@ class FindMoJDataEntityMapper: ) ChartEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.CHART.value, - DatahubEntityType.CHART.value, + FindMoJDataEntityType.CHART, + DatahubEntityType.CHART, [], "chart" ) GlossaryTermEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.GLOSSARY_TERM.value, - DatahubEntityType.GLOSSARY_TERM.value, + FindMoJDataEntityType.GLOSSARY_TERM, + DatahubEntityType.GLOSSARY_TERM, [], "glossary_term" ) DatabaseEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.DATABASE.value, - DatahubEntityType.CONTAINER.value, + FindMoJDataEntityType.DATABASE, + DatahubEntityType.CONTAINER, [DatahubSubtype.DATABASE.value], "database" ) DashboardEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.DASHBOARD.value, - DatahubEntityType.DASHBOARD.value, + FindMoJDataEntityType.DASHBOARD, + DatahubEntityType.DASHBOARD, [], "dashboard" ) PublicationDatasetEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.PUBLICATION_DATASET.value, - DatahubEntityType.DATASET.value, + FindMoJDataEntityType.PUBLICATION_DATASET, + DatahubEntityType.DATASET, [DatahubSubtype.PUBLICATION_DATASET.value], "publication_dataset" ) PublicationCollectionEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.PUBLICATION_COLLECTION.value, - DatahubEntityType.CONTAINER.value, + FindMoJDataEntityType.PUBLICATION_COLLECTION, + DatahubEntityType.CONTAINER, [DatahubSubtype.PUBLICATION_COLLECTION.value], "publication_collection" ) +Mappers = [ + TableEntityMapper, + ChartEntityMapper, + GlossaryTermEntityMapper, + DatabaseEntityMapper, + DashboardEntityMapper, + PublicationDatasetEntityMapper, + PublicationCollectionEntityMapper +] + class Audience(Enum): INTERNAL = "Internal" diff --git a/templates/partial/search_result.html b/templates/partial/search_result.html index 3c3c8526..89e9b686 100644 --- a/templates/partial/search_result.html +++ b/templates/partial/search_result.html @@ -12,7 +12,7 @@

{{result.name}} {% endwith %} - {{ result.result_type.find_moj_data_type }} + {{ result.result_type.find_moj_data_type.value }}

{% if result.description %} From 7a0f08c47641cb2cc4f992e0a91f61a27f7e0430 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Fri, 6 Dec 2024 09:54:49 +0000 Subject: [PATCH 06/10] fix: typo --- .../data_platform_catalogue/client/search/search_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index 7b8f2e89..f1e8f19c 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -52,7 +52,7 @@ def __init__(self, graph: DataHubGraph): self.list_domains_query = get_graphql_query("listDomains") self.get_glossary_terms_query = get_graphql_query("getGlossaryTerms") self.get_tags_query = get_graphql_query("getTags") - self.datahub_type.values_to_fmd_type_and_parser_mapping = { + self.fmd_type_to_datahub_types_mapping = { ( DatahubEntityType.DATASET.value, DatahubSubtype.PUBLICATION_DATASET.value, @@ -186,7 +186,7 @@ def _parse_search_results(self, response) -> Tuple[list, list]: matched_fields = self._get_matched_fields(result=result) try: - parser, fmd_type = self.datahub_type.values_to_fmd_type_and_parser_mapping[ + parser, fmd_type = self.fmd_type_to_datahub_types_mapping[ (entity_type, entity_subtype) ] parsed_result = parser(entity, matched_fields, fmd_type) From 92538ca6afce4492248f512e2680a234b6f90c05 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Mon, 9 Dec 2024 09:21:40 +0000 Subject: [PATCH 07/10] fix: rename mapping dict --- .../data_platform_catalogue/client/search/search_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index f1e8f19c..930124bd 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -52,7 +52,7 @@ def __init__(self, graph: DataHubGraph): self.list_domains_query = get_graphql_query("listDomains") self.get_glossary_terms_query = get_graphql_query("getGlossaryTerms") self.get_tags_query = get_graphql_query("getTags") - self.fmd_type_to_datahub_types_mapping = { + self.datahub_types_to_fmd_type_and_parser_mapping = { ( DatahubEntityType.DATASET.value, DatahubSubtype.PUBLICATION_DATASET.value, @@ -186,7 +186,7 @@ def _parse_search_results(self, response) -> Tuple[list, list]: matched_fields = self._get_matched_fields(result=result) try: - parser, fmd_type = self.fmd_type_to_datahub_types_mapping[ + parser, fmd_type = self.datahub_types_to_fmd_type_and_parser_mapping[ (entity_type, entity_subtype) ] parsed_result = parser(entity, matched_fields, fmd_type) From 8834e3a1f438edf94edbf159860f8eb7721bc7cc Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Mon, 9 Dec 2024 10:10:40 +0000 Subject: [PATCH 08/10] fix: use url_formatted parent_type --- home/service/details.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/home/service/details.py b/home/service/details.py index 8f4784c1..c93f7dab 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -117,7 +117,7 @@ def _get_context(self): "entity": self.table_metadata, "entity_type": "Table", "parent_entity": self.parent_entity, - "parent_type": DatabaseEntityMapper.datahub_type.value.lower(), + "parent_type": DatabaseEntityMapper.url_formatted, "h1_value": self.table_metadata.name, "has_lineage": self.has_lineage(), "lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&", # noqa: E501 @@ -164,7 +164,7 @@ def _get_context(self): self.chart_metadata.platform.display_name ), "parent_entity": self.parent_entity, - "parent_type": DashboardEntityMapper.datahub_type.value.lower(), + "parent_type": DashboardEntityMapper.url_formatted, "h1_value": self.chart_metadata.name, "is_access_requirements_a_url": is_access_requirements_a_url( self.chart_metadata.custom_properties.access_information.dc_access_requirements @@ -266,7 +266,7 @@ def _get_context(self): "platform_name": friendly_platform_name( self.publication_dataset_metadata.platform.display_name ), - "parent_type": DatabaseEntityMapper.datahub_type.value.lower(), + "parent_type": DatabaseEntityMapper.url_formatted, "h1_value": self.publication_dataset_metadata.name, # noqa: E501 "is_access_requirements_a_url": is_access_requirements_a_url( From 7377ee4f1038fd4d194254f067c099e4b0dee316 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Mon, 9 Dec 2024 10:26:01 +0000 Subject: [PATCH 09/10] refactor: rename mapper variables to mapping --- home/service/details.py | 12 +++--- home/views.py | 26 ++++++------ .../client/datahub_client.py | 14 +++---- .../client/search/search_client.py | 42 +++++++++---------- .../data_platform_catalogue/entities.py | 24 +++++------ .../tests/client/search/test_search_client.py | 34 +++++++-------- .../test_integration_with_datahub_server.py | 4 +- tests/conftest.py | 20 ++++----- tests/home/service/test_glossary.py | 8 ++-- .../test_interact_with_search_results.py | 6 +-- .../test_search_result_metadata.py | 6 +-- 11 files changed, 98 insertions(+), 98 deletions(-) diff --git a/home/service/details.py b/home/service/details.py index c93f7dab..25023664 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -2,8 +2,8 @@ from urllib.parse import urlsplit from data_platform_catalogue.entities import ( - EntityRef, RelationshipType, DatabaseEntityMapper, - DashboardEntityMapper, PublicationCollectionEntityMapper, PublicationDatasetEntityMapper + EntityRef, RelationshipType, DatabaseEntityMapping, + DashboardEntityMapping, PublicationCollectionEntityMapper, PublicationDatasetEntityMapping ) from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator @@ -117,7 +117,7 @@ def _get_context(self): "entity": self.table_metadata, "entity_type": "Table", "parent_entity": self.parent_entity, - "parent_type": DatabaseEntityMapper.url_formatted, + "parent_type": DatabaseEntityMapping.url_formatted, "h1_value": self.table_metadata.name, "has_lineage": self.has_lineage(), "lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&", # noqa: E501 @@ -164,7 +164,7 @@ def _get_context(self): self.chart_metadata.platform.display_name ), "parent_entity": self.parent_entity, - "parent_type": DashboardEntityMapper.url_formatted, + "parent_type": DashboardEntityMapping.url_formatted, "h1_value": self.chart_metadata.name, "is_access_requirements_a_url": is_access_requirements_a_url( self.chart_metadata.custom_properties.access_information.dc_access_requirements @@ -261,12 +261,12 @@ def _get_context(self): return { "entity": self.publication_dataset_metadata, - "entity_type": PublicationDatasetEntityMapper.find_moj_data_type.value, + "entity_type": PublicationDatasetEntityMapping.find_moj_data_type.value, "parent_entity": self.parent_entity, "platform_name": friendly_platform_name( self.publication_dataset_metadata.platform.display_name ), - "parent_type": DatabaseEntityMapper.url_formatted, + "parent_type": DatabaseEntityMapping.url_formatted, "h1_value": self.publication_dataset_metadata.name, # noqa: E501 "is_access_requirements_a_url": is_access_requirements_a_url( diff --git a/home/views.py b/home/views.py index d6be8032..5c56ac01 100644 --- a/home/views.py +++ b/home/views.py @@ -4,12 +4,12 @@ from data_platform_catalogue.client.exceptions import EntityDoesNotExist from data_platform_catalogue.entities import ( - ChartEntityMapper, - DashboardEntityMapper, - DatabaseEntityMapper, + ChartEntityMapping, + DashboardEntityMapping, + DatabaseEntityMapping, PublicationCollectionEntityMapper, - PublicationDatasetEntityMapper, - TableEntityMapper, + PublicationDatasetEntityMapping, + TableEntityMapping, ) from data_platform_catalogue.search_types import DomainOption from django.conf import settings @@ -38,12 +38,12 @@ from home.service.search import SearchService type_details_map = { - TableEntityMapper.url_formatted: DatasetDetailsService, - DatabaseEntityMapper.url_formatted: DatabaseDetailsService, - ChartEntityMapper.url_formatted: ChartDetailsService, - DashboardEntityMapper.url_formatted: DashboardDetailsService, + TableEntityMapping.url_formatted: DatasetDetailsService, + DatabaseEntityMapping.url_formatted: DatabaseDetailsService, + ChartEntityMapping.url_formatted: ChartDetailsService, + DashboardEntityMapping.url_formatted: DashboardDetailsService, PublicationCollectionEntityMapper.url_formatted: PublicationCollectionDetailsService, - PublicationDatasetEntityMapper.url_formatted: PublicationDatasetDetailsService + PublicationDatasetEntityMapping.url_formatted: PublicationDatasetDetailsService } @@ -74,11 +74,11 @@ def details_view(request, result_type, urn): @cache_control(max_age=300, private=True) def details_view_csv(request, result_type, urn) -> HttpResponse: match result_type: - case TableEntityMapper.url_formatted: + case TableEntityMapping.url_formatted: csv_formatter = DatasetDetailsCsvFormatter(DatasetDetailsService(urn)) - case DatabaseEntityMapper.url_formatted: + case DatabaseEntityMapping.url_formatted: csv_formatter = DatabaseDetailsCsvFormatter(DatabaseDetailsService(urn)) - case DashboardEntityMapper.url_formatted: + case DashboardEntityMapping.url_formatted: csv_formatter = DashboardDetailsCsvFormatter(DashboardDetailsService(urn)) case _: logging.error("Invalid result type for csv details view %s", result_type) diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index a8f8118c..4a89dd5c 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -64,9 +64,9 @@ EntityRef, EntitySummary, FindMoJDataEntityMapper, - TableEntityMapper, - ChartEntityMapper, - DatabaseEntityMapper, + TableEntityMapping, + ChartEntityMapping, + DatabaseEntityMapping, Governance, PublicationCollection, PublicationDataset, @@ -207,9 +207,9 @@ def search( count: int = 20, page: str | None = None, result_types: Sequence[FindMoJDataEntityMapper] = ( - TableEntityMapper, - ChartEntityMapper, - DatabaseEntityMapper, + TableEntityMapping, + ChartEntityMapping, + DatabaseEntityMapping, ), filters: Sequence[MultiSelectFilter] | None = None, sort: SortOption | None = None, @@ -398,7 +398,7 @@ def get_publication_collection_details(self, urn: str) -> PublicationCollection: child_relations = parse_relations( relationship_type=RelationshipType.CHILD, relations_list=[response["relationships"]], - entity_type_of_relations=PublicationDatasetEntityMapper.url_formatted, + entity_type_of_relations=PublicationDatasetEntityMapping.url_formatted, ) relations_to_display = self.list_relations_to_display(child_relations) diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index 930124bd..6386d6a1 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -22,13 +22,13 @@ DatahubEntityType, DatahubSubtype, FindMoJDataEntityMapper, - TableEntityMapper, - ChartEntityMapper, - DatabaseEntityMapper, - DashboardEntityMapper, - PublicationDatasetEntityMapper, + TableEntityMapping, + ChartEntityMapping, + DatabaseEntityMapping, + DashboardEntityMapping, + PublicationDatasetEntityMapping, PublicationCollectionEntityMapper, - GlossaryTermEntityMapper, + GlossaryTermEntityMapping, EntityRef ) from data_platform_catalogue.search_types import ( @@ -58,31 +58,31 @@ def __init__(self, graph: DataHubGraph): DatahubSubtype.PUBLICATION_DATASET.value, ): ( self._parse_dataset, - PublicationDatasetEntityMapper, + PublicationDatasetEntityMapping, ), (DatahubEntityType.DATASET.value, DatahubSubtype.METRIC.value): ( self._parse_dataset, - TableEntityMapper, + TableEntityMapping, ), (DatahubEntityType.DATASET.value, DatahubSubtype.TABLE.value): ( self._parse_dataset, - TableEntityMapper, + TableEntityMapping, ), (DatahubEntityType.DATASET.value, DatahubSubtype.MODEL.value): ( self._parse_dataset, - TableEntityMapper, + TableEntityMapping, ), (DatahubEntityType.DATASET.value, DatahubSubtype.SEED.value): ( self._parse_dataset, - TableEntityMapper, + TableEntityMapping, ), (DatahubEntityType.DATASET.value, DatahubSubtype.SOURCE.value): ( self._parse_dataset, - TableEntityMapper, + TableEntityMapping, ), (DatahubEntityType.CONTAINER.value, DatahubSubtype.DATABASE.value): ( self._parse_container, - DatabaseEntityMapper, + DatabaseEntityMapping, ), ( DatahubEntityType.CONTAINER.value, @@ -96,15 +96,15 @@ def __init__(self, graph: DataHubGraph): DatahubSubtype.PUBLICATION_DATASET.value, ): ( self._parse_container, - PublicationDatasetEntityMapper, + PublicationDatasetEntityMapping, ), (DatahubEntityType.CHART.value, None): ( self._parse_dataset, - ChartEntityMapper, + ChartEntityMapping, ), (DatahubEntityType.DASHBOARD.value, None): ( self._parse_container, - DashboardEntityMapper, + DashboardEntityMapping, ), } @@ -114,9 +114,9 @@ def search( count: int = 20, page: str | None = None, result_types: Sequence[FindMoJDataEntityMapper] = ( - TableEntityMapper, - ChartEntityMapper, - DatabaseEntityMapper, + TableEntityMapping, + ChartEntityMapping, + DatabaseEntityMapping, ), filters: Sequence[MultiSelectFilter] | None = None, sort: SortOption | None = None, @@ -253,7 +253,7 @@ def _get_data_collection_page_results(self, response, key_for_results: str): matched_fields: dict = {} if entity_type == "DATASET": page_results.append( - self._parse_dataset(entity, matched_fields, TableEntityMapper) + self._parse_dataset(entity, matched_fields, TableEntityMapping) ) else: raise ValueError(f"Unexpected entity type: {entity_type}") @@ -370,7 +370,7 @@ def _parse_glossary_term(self, entity) -> SearchResult: return SearchResult( urn=entity["urn"], - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, matches={}, name=name, display_name=display_name, diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index ff4fee2f..cab0e297 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -52,7 +52,7 @@ class FindMoJDataEntityMapper: url_formatted: str -TableEntityMapper = FindMoJDataEntityMapper( +TableEntityMapping = FindMoJDataEntityMapper( FindMoJDataEntityType.TABLE, DatahubEntityType.DATASET, [ @@ -64,35 +64,35 @@ class FindMoJDataEntityMapper: "table" ) -ChartEntityMapper = FindMoJDataEntityMapper( +ChartEntityMapping = FindMoJDataEntityMapper( FindMoJDataEntityType.CHART, DatahubEntityType.CHART, [], "chart" ) -GlossaryTermEntityMapper = FindMoJDataEntityMapper( +GlossaryTermEntityMapping = FindMoJDataEntityMapper( FindMoJDataEntityType.GLOSSARY_TERM, DatahubEntityType.GLOSSARY_TERM, [], "glossary_term" ) -DatabaseEntityMapper = FindMoJDataEntityMapper( +DatabaseEntityMapping = FindMoJDataEntityMapper( FindMoJDataEntityType.DATABASE, DatahubEntityType.CONTAINER, [DatahubSubtype.DATABASE.value], "database" ) -DashboardEntityMapper = FindMoJDataEntityMapper( +DashboardEntityMapping = FindMoJDataEntityMapper( FindMoJDataEntityType.DASHBOARD, DatahubEntityType.DASHBOARD, [], "dashboard" ) -PublicationDatasetEntityMapper = FindMoJDataEntityMapper( +PublicationDatasetEntityMapping = FindMoJDataEntityMapper( FindMoJDataEntityType.PUBLICATION_DATASET, DatahubEntityType.DATASET, [DatahubSubtype.PUBLICATION_DATASET.value], @@ -107,12 +107,12 @@ class FindMoJDataEntityMapper: ) Mappers = [ - TableEntityMapper, - ChartEntityMapper, - GlossaryTermEntityMapper, - DatabaseEntityMapper, - DashboardEntityMapper, - PublicationDatasetEntityMapper, + TableEntityMapping, + ChartEntityMapping, + GlossaryTermEntityMapping, + DatabaseEntityMapping, + DashboardEntityMapping, + PublicationDatasetEntityMapping, PublicationCollectionEntityMapper ] diff --git a/lib/datahub-client/tests/client/search/test_search_client.py b/lib/datahub-client/tests/client/search/test_search_client.py index 4436413b..4e5f6a60 100644 --- a/lib/datahub-client/tests/client/search/test_search_client.py +++ b/lib/datahub-client/tests/client/search/test_search_client.py @@ -7,10 +7,10 @@ AccessInformation, DataSummary, EntityRef, - TableEntityMapper, - DatabaseEntityMapper, - ChartEntityMapper, - GlossaryTermEntityMapper, + TableEntityMapping, + DatabaseEntityMapping, + ChartEntityMapping, + GlossaryTermEntityMapping, FurtherInformation, TagRef, UsageRestrictions, @@ -125,7 +125,7 @@ def test_one_search_result(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers", display_name="customers", fully_qualified_name="jaffle_shop.customers", @@ -216,7 +216,7 @@ def test_dataset_result(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers", display_name="customers", fully_qualified_name="jaffle_shop.customers", @@ -366,7 +366,7 @@ def test_2_dataset_results_with_one_malformed_result(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers", display_name="customers", fully_qualified_name="jaffle_shop.customers", @@ -463,7 +463,7 @@ def test_full_page(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers", fully_qualified_name="jaffle_shop.customers", display_name="customers", @@ -494,7 +494,7 @@ def test_full_page(mock_graph, searcher): ), SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers2,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers2", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers2", display_name="customers2", @@ -525,7 +525,7 @@ def test_full_page(mock_graph, searcher): ), SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers3,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers3", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers3", display_name="customers3", @@ -603,7 +603,7 @@ def test_query_match(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers", display_name="customers", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers", @@ -688,7 +688,7 @@ def test_result_with_owner(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,calm-pagoda-323403.jaffle_shop.customers,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="customers", display_name="customers", fully_qualified_name="calm-pagoda-323403.jaffle_shop.customers", @@ -916,7 +916,7 @@ def test_get_glossary_terms(mock_graph, searcher): } ] }, - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, ), SearchResult( urn="urn:li:glossaryTerm:0eb7af28-62b4-4149-a6fa-72a8f1fea1e6", @@ -925,7 +925,7 @@ def test_get_glossary_terms(mock_graph, searcher): fully_qualified_name="Security classification", description="Only data that is 'official'", metadata={"parentNodes": []}, - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, ), ], ) @@ -978,7 +978,7 @@ def test_search_for_charts(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:chart:(justice-data,absconds)", - result_type=ChartEntityMapper, + result_type=ChartEntityMapping, name="Absconds", display_name="Absconds", fully_qualified_name="Absconds", @@ -1100,7 +1100,7 @@ def test_search_for_container(mock_graph, searcher): page_results=[ SearchResult( urn="urn:li:container:test_db", - result_type=DatabaseEntityMapper, + result_type=DatabaseEntityMapping, name="test_db", display_name="test_db", fully_qualified_name="test_db", @@ -1154,7 +1154,7 @@ def test_search_for_container(mock_graph, searcher): def test_tag_to_display(tags, result): test_search_result = SearchResult( urn="urn:li:dataset:(urn:li:dataPlatform:athena,test_db.test_table,PROD)", - result_type=TableEntityMapper, + result_type=TableEntityMapping, name="test_table", display_name="test_table", fully_qualified_name="test_db.test_table", diff --git a/lib/datahub-client/tests/test_integration_with_datahub_server.py b/lib/datahub-client/tests/test_integration_with_datahub_server.py index 1a75c22c..b2f7648a 100644 --- a/lib/datahub-client/tests/test_integration_with_datahub_server.py +++ b/lib/datahub-client/tests/test_integration_with_datahub_server.py @@ -20,7 +20,7 @@ Database, DomainRef, EntityRef, - TableEntityMapper, + TableEntityMapping, Governance, OwnerRef, TagRef, @@ -60,7 +60,7 @@ def test_search_by_domain(): response = client.search( filters=[MultiSelectFilter("domains", ["does-not-exist"])], - result_types=(TableEntityMapper), + result_types=(TableEntityMapping), ) assert response.total_results == 0 diff --git a/tests/conftest.py b/tests/conftest.py index 4db8db5f..4cc18cca 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,9 +31,9 @@ DomainRef, EntityRef, FindMoJDataEntityMapper, - DatabaseEntityMapper, - TableEntityMapper, - GlossaryTermEntityMapper, + DatabaseEntityMapping, + TableEntityMapping, + GlossaryTermEntityMapping, EntitySummary, GlossaryTermRef, Governance, @@ -314,7 +314,7 @@ def generate_search_result( return SearchResult( urn=urn or fake.unique.name(), result_type=( - choice((DatabaseEntityMapper, TableEntityMapper)) + choice((DatabaseEntityMapping, TableEntityMapping)) if result_type is None else result_type ), @@ -328,7 +328,7 @@ def generate_search_result( def search_result_from_database(database: Database): return SearchResult( urn=database.urn or "", - result_type=DatabaseEntityMapper, + result_type=DatabaseEntityMapping, name=database.name, fully_qualified_name=database.fully_qualified_name or "", description=database.description, @@ -710,7 +710,7 @@ def mock_get_glossary_terms_response(mock_catalogue): } ] }, - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, ), SearchResult( urn="urn:li:glossaryTerm:022b9b68-c211-47ae-aef0-2db13acfeca8", @@ -726,14 +726,14 @@ def mock_get_glossary_terms_response(mock_catalogue): } ] }, - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, ), SearchResult( urn="urn:li:glossaryTerm:0eb7af28-62b4-4149-a6fa-72a8f1fea1e6", name="Security classification", description="Only data that is 'official'", metadata={"parentNodes": []}, - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, ), ], ) @@ -790,7 +790,7 @@ def search_context(search_service): def detail_database_context(mock_catalogue): mock_catalogue.search.return_value = SearchResponse( total_results=1, - page_results=generate_page(page_size=1, result_type=DatabaseEntityMapper), + page_results=generate_page(page_size=1, result_type=DatabaseEntityMapping), ) details_service = DatabaseDetailsService(urn="urn:li:container:test") @@ -812,7 +812,7 @@ def dataset_with_parent(mock_catalogue) -> dict[str, Any]: total_results=1, page_results=[ generate_search_result( - result_type=TableEntityMapper, + result_type=TableEntityMapping, urn="table-abc", metadata={}, ) diff --git a/tests/home/service/test_glossary.py b/tests/home/service/test_glossary.py index 379a29fd..2a551fde 100644 --- a/tests/home/service/test_glossary.py +++ b/tests/home/service/test_glossary.py @@ -1,4 +1,4 @@ -from data_platform_catalogue.entities import GlossaryTermEntityMapper +from data_platform_catalogue.entities import GlossaryTermEntityMapping from data_platform_catalogue.search_types import SearchResult from home.service.glossary import GlossaryService @@ -15,7 +15,7 @@ def test_get_context(self): "members": [ SearchResult( urn="urn:li:glossaryTerm:022b9b68-c211-47ae-aef0-2db13acfeca8", - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, name="IAO", description="Information asset owner.\n", matches={}, @@ -34,7 +34,7 @@ def test_get_context(self): ), SearchResult( urn="urn:li:glossaryTerm:022b9b68-c211-47ae-aef0-2db13acfeca8", - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, name="Other term", description="Term description to test groupings work", matches={}, @@ -59,7 +59,7 @@ def test_get_context(self): "members": [ SearchResult( urn="urn:li:glossaryTerm:0eb7af28-62b4-4149-a6fa-72a8f1fea1e6", - result_type=GlossaryTermEntityMapper, + result_type=GlossaryTermEntityMapping, name="Security classification", description="Only data that is 'official'", matches={}, diff --git a/tests/integration/test_interact_with_search_results.py b/tests/integration/test_interact_with_search_results.py index 918a261a..05963dd1 100644 --- a/tests/integration/test_interact_with_search_results.py +++ b/tests/integration/test_interact_with_search_results.py @@ -1,6 +1,6 @@ import pytest -from data_platform_catalogue.entities import TableEntityMapper +from data_platform_catalogue.entities import TableEntityMapping from tests.conftest import ( generate_page, generate_table_metadata, @@ -45,7 +45,7 @@ def test_table_search_to_details(self, mock_catalogue): """ mock_search_response( mock_catalogue=mock_catalogue, - page_results=generate_page(result_type=TableEntityMapper), + page_results=generate_page(result_type=TableEntityMapping), total_results=100, ) self.start_on_the_search_page() @@ -62,7 +62,7 @@ def test_table_search_to_details_accessibility(self, mock_catalogue): """ mock_search_response( mock_catalogue=mock_catalogue, - page_results=generate_page(result_type=TableEntityMapper), + page_results=generate_page(result_type=TableEntityMapping), total_results=100, ) table_no_column_description = generate_table_metadata(column_description="") diff --git a/tests/integration/test_search_result_metadata.py b/tests/integration/test_search_result_metadata.py index 93a7e7e9..3d080f49 100644 --- a/tests/integration/test_search_result_metadata.py +++ b/tests/integration/test_search_result_metadata.py @@ -1,5 +1,5 @@ import pytest -from data_platform_catalogue.entities import DatabaseEntityMapper +from data_platform_catalogue.entities import DatabaseEntityMapping from data_platform_catalogue.search_types import SearchResult from tests.conftest import mock_search_response @@ -21,7 +21,7 @@ def setup(self, live_server, selenium): def test_matched_fields_hidden(self, mock_catalogue): result = SearchResult( urn="fake-urn", - result_type=DatabaseEntityMapper, + result_type=DatabaseEntityMapping, name="abc", fully_qualified_name="abc", description="bla bla bla", @@ -36,7 +36,7 @@ def test_matched_fields_hidden(self, mock_catalogue): def test_matched_fields_shown(self, mock_catalogue): result = SearchResult( urn="fake-urn", - result_type=DatabaseEntityMapper, + result_type=DatabaseEntityMapping, name="abc", fully_qualified_name="abc", description="bla bla bla", From c321eadb15e2ec080d4f8086ff350e35bf003631 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Mon, 9 Dec 2024 10:28:03 +0000 Subject: [PATCH 10/10] fix: FindMoJData -> FindMoJdata --- home/forms/search.py | 4 +-- home/service/search.py | 4 +-- .../client/datahub_client.py | 4 +-- .../client/search/search_client.py | 10 +++--- .../data_platform_catalogue/entities.py | 34 +++++++++---------- .../data_platform_catalogue/search_types.py | 4 +-- tests/conftest.py | 6 ++-- tests/end_to_end/test_datahub_to_fmd.py | 2 +- 8 files changed, 34 insertions(+), 34 deletions(-) diff --git a/home/forms/search.py b/home/forms/search.py index d0565f34..299bc6ff 100644 --- a/home/forms/search.py +++ b/home/forms/search.py @@ -1,7 +1,7 @@ from copy import deepcopy from urllib.parse import urlencode -from data_platform_catalogue.entities import FindMoJDataEntityType +from data_platform_catalogue.entities import FindMoJdataEntityType from data_platform_catalogue.search_types import DomainOption from django import forms from django.utils.translation import gettext as _ @@ -38,7 +38,7 @@ def get_entity_types(): return sorted( [ (entity.name, entity.value) - for entity in FindMoJDataEntityType + for entity in FindMoJdataEntityType if entity.name != "GLOSSARY_TERM" ] ) diff --git a/home/service/search.py b/home/service/search.py index e2a3c658..a29f3165 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -2,7 +2,7 @@ from copy import deepcopy from typing import Any -from data_platform_catalogue.entities import FindMoJDataEntityMapper, Mappers +from data_platform_catalogue.entities import FindMoJdataEntityMapper, Mappers from data_platform_catalogue.search_types import ( DomainOption, MultiSelectFilter, @@ -45,7 +45,7 @@ def _build_custom_property_filter( ) -> list[str]: return [f"{filter_param}{filter_value}" for filter_value in filter_value_list] - def _build_entity_types(self, entity_types: list[str]) -> tuple[FindMoJDataEntityMapper, ...]: + def _build_entity_types(self, entity_types: list[str]) -> tuple[FindMoJdataEntityMapper, ...]: default_entities = tuple( Mapper for Mapper in Mappers diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index 4a89dd5c..7c83ecd8 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -63,7 +63,7 @@ Database, EntityRef, EntitySummary, - FindMoJDataEntityMapper, + FindMoJdataEntityMapper, TableEntityMapping, ChartEntityMapping, DatabaseEntityMapping, @@ -206,7 +206,7 @@ def search( query: str = "*", count: int = 20, page: str | None = None, - result_types: Sequence[FindMoJDataEntityMapper] = ( + result_types: Sequence[FindMoJdataEntityMapper] = ( TableEntityMapping, ChartEntityMapping, DatabaseEntityMapping, diff --git a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py index 6386d6a1..448cec5f 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search/search_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/search/search_client.py @@ -21,7 +21,7 @@ from data_platform_catalogue.entities import ( DatahubEntityType, DatahubSubtype, - FindMoJDataEntityMapper, + FindMoJdataEntityMapper, TableEntityMapping, ChartEntityMapping, DatabaseEntityMapping, @@ -113,7 +113,7 @@ def search( query: str = "*", count: int = 20, page: str | None = None, - result_types: Sequence[FindMoJDataEntityMapper] = ( + result_types: Sequence[FindMoJdataEntityMapper] = ( TableEntityMapping, ChartEntityMapping, DatabaseEntityMapping, @@ -261,7 +261,7 @@ def _get_data_collection_page_results(self, response, key_for_results: str): def _map_result_types( self, - result_types: Sequence[FindMoJDataEntityMapper], + result_types: Sequence[FindMoJdataEntityMapper], ) -> list[str]: """ Map result types to Datahub EntityTypes @@ -288,7 +288,7 @@ def _parse_list_domains( return list_domain_options def _parse_dataset( - self, entity: dict[str, Any], matches, result_type: FindMoJDataEntityMapper + self, entity: dict[str, Any], matches, result_type: FindMoJdataEntityMapper ) -> SearchResult: """ Map a dataset entity to a SearchResult @@ -433,7 +433,7 @@ def _parse_global_tags(self, tag_query_results) -> list[tuple[str, str]]: return tags_list def _parse_container( - self, entity: dict[str, Any], matches, subtype: FindMoJDataEntityMapper + self, entity: dict[str, Any], matches, subtype: FindMoJdataEntityMapper ) -> SearchResult: """ Map a Container entity to a SearchResult diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index cab0e297..917e34fb 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -34,7 +34,7 @@ class DatahubSubtype(Enum): DATABASE = "Database" -class FindMoJDataEntityType(Enum): +class FindMoJdataEntityType(Enum): TABLE = "Table" GLOSSARY_TERM = "Glossary term" CHART = "Chart" @@ -45,15 +45,15 @@ class FindMoJDataEntityType(Enum): @dataclass -class FindMoJDataEntityMapper: - find_moj_data_type: FindMoJDataEntityType +class FindMoJdataEntityMapper: + find_moj_data_type: FindMoJdataEntityType datahub_type: DatahubEntityType datahub_subtypes: list[str] url_formatted: str -TableEntityMapping = FindMoJDataEntityMapper( - FindMoJDataEntityType.TABLE, +TableEntityMapping = FindMoJdataEntityMapper( + FindMoJdataEntityType.TABLE, DatahubEntityType.DATASET, [ DatahubSubtype.MODEL.value, @@ -64,43 +64,43 @@ class FindMoJDataEntityMapper: "table" ) -ChartEntityMapping = FindMoJDataEntityMapper( - FindMoJDataEntityType.CHART, +ChartEntityMapping = FindMoJdataEntityMapper( + FindMoJdataEntityType.CHART, DatahubEntityType.CHART, [], "chart" ) -GlossaryTermEntityMapping = FindMoJDataEntityMapper( - FindMoJDataEntityType.GLOSSARY_TERM, +GlossaryTermEntityMapping = FindMoJdataEntityMapper( + FindMoJdataEntityType.GLOSSARY_TERM, DatahubEntityType.GLOSSARY_TERM, [], "glossary_term" ) -DatabaseEntityMapping = FindMoJDataEntityMapper( - FindMoJDataEntityType.DATABASE, +DatabaseEntityMapping = FindMoJdataEntityMapper( + FindMoJdataEntityType.DATABASE, DatahubEntityType.CONTAINER, [DatahubSubtype.DATABASE.value], "database" ) -DashboardEntityMapping = FindMoJDataEntityMapper( - FindMoJDataEntityType.DASHBOARD, +DashboardEntityMapping = FindMoJdataEntityMapper( + FindMoJdataEntityType.DASHBOARD, DatahubEntityType.DASHBOARD, [], "dashboard" ) -PublicationDatasetEntityMapping = FindMoJDataEntityMapper( - FindMoJDataEntityType.PUBLICATION_DATASET, +PublicationDatasetEntityMapping = FindMoJdataEntityMapper( + FindMoJdataEntityType.PUBLICATION_DATASET, DatahubEntityType.DATASET, [DatahubSubtype.PUBLICATION_DATASET.value], "publication_dataset" ) -PublicationCollectionEntityMapper = FindMoJDataEntityMapper( - FindMoJDataEntityType.PUBLICATION_COLLECTION, +PublicationCollectionEntityMapper = FindMoJdataEntityMapper( + FindMoJdataEntityType.PUBLICATION_COLLECTION, DatahubEntityType.CONTAINER, [DatahubSubtype.PUBLICATION_COLLECTION.value], "publication_collection" diff --git a/lib/datahub-client/data_platform_catalogue/search_types.py b/lib/datahub-client/data_platform_catalogue/search_types.py index a4d1f59b..fd459d43 100644 --- a/lib/datahub-client/data_platform_catalogue/search_types.py +++ b/lib/datahub-client/data_platform_catalogue/search_types.py @@ -7,7 +7,7 @@ from data_platform_catalogue.entities import ( EntityRef, GlossaryTermRef, - FindMoJDataEntityMapper, + FindMoJdataEntityMapper, TagRef, ) @@ -64,7 +64,7 @@ class DomainOption: @dataclass class SearchResult: urn: str - result_type: FindMoJDataEntityMapper + result_type: FindMoJdataEntityMapper name: str display_name: str = "" fully_qualified_name: str = "" diff --git a/tests/conftest.py b/tests/conftest.py index 4cc18cca..5044a2b1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,7 +30,7 @@ Database, DomainRef, EntityRef, - FindMoJDataEntityMapper, + FindMoJdataEntityMapper, DatabaseEntityMapping, TableEntityMapping, GlossaryTermEntityMapping, @@ -304,7 +304,7 @@ def page_titles(): def generate_search_result( - result_type: FindMoJDataEntityMapper | None = None, urn=None, metadata=None + result_type: FindMoJdataEntityMapper | None = None, urn=None, metadata=None ) -> SearchResult: """ Generate a random search result @@ -572,7 +572,7 @@ def example_table(name="example_table"): return generate_table_metadata(name=name) -def generate_page(page_size=20, result_type: FindMoJDataEntityMapper | None = None): +def generate_page(page_size=20, result_type: FindMoJdataEntityMapper | None = None): """ Generate a fake search page """ diff --git a/tests/end_to_end/test_datahub_to_fmd.py b/tests/end_to_end/test_datahub_to_fmd.py index e31025f8..a5d784d4 100644 --- a/tests/end_to_end/test_datahub_to_fmd.py +++ b/tests/end_to_end/test_datahub_to_fmd.py @@ -6,7 +6,7 @@ @pytest.mark.slow @pytest.mark.datahub -class TestDatahubToFindMoJData: +class TestDatahubToFindMoJdata: """ Test that Find MoJ data works with a real Datahub backend. The datahub mark is used to bypass the `mock_catalogue` fixture.