From e2ff430aefa452a2645a0a326af4c9dd2a9e8c57 Mon Sep 17 00:00:00 2001 From: Murdo Moyse Date: Mon, 2 Dec 2024 17:01:25 +0000 Subject: [PATCH] Trying to fix the filters --- home/service/search.py | 24 +++++------- .../data_platform_catalogue/client/search.py | 38 +++++-------------- .../data_platform_catalogue/entities.py | 6 --- 3 files changed, 18 insertions(+), 50 deletions(-) diff --git a/home/service/search.py b/home/service/search.py index 3dde548e..42aff45d 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -15,7 +15,6 @@ from django.utils.translation import pgettext from nltk.stem import PorterStemmer -from data_platform_catalogue.entities import RESULT_TYPES_TO_FILTER from home.forms.search import SearchForm from home.models.domain_model import DomainModel @@ -51,13 +50,11 @@ def _build_entity_types(self, entity_types: list[str]) -> tuple[EntityTypes, ... entity for entity in EntityTypes if entity.name != "GLOSSARY_TERM" - and entity not in RESULT_TYPES_TO_FILTER ) chosen_entities = ( tuple( EntityTypes[entity] for entity in entity_types - if EntityTypes[entity] not in RESULT_TYPES_TO_FILTER ) if entity_types else None @@ -65,16 +62,16 @@ def _build_entity_types(self, entity_types: list[str]) -> tuple[EntityTypes, ... return chosen_entities if chosen_entities else default_entities - def _build_entity_subtypes_filter(self, entity_types: list[str]) -> MultiSelectFilter | None: - # The filter needs a non-capitalised string rather than the enum value - subtype_strings = [ - EntityTypes[entity_type].value - for entity_type in entity_types - if EntityTypes[entity_type] in RESULT_TYPES_TO_FILTER - ] - entity_subtypes_filter = MultiSelectFilter("typeNames", subtype_strings) if subtype_strings else None + # def _build_entity_subtypes_filter(self, entity_types: list[str]) -> MultiSelectFilter | None: + # # The filter needs a non-capitalised string rather than the enum value + # subtype_strings = [ + # EntityTypes[entity_type].value + # for entity_type in entity_types + # if EntityTypes[entity_type] in RESULT_TYPES_TO_FILTER + # ] + # entity_subtypes_filter = MultiSelectFilter("typeNames", subtype_strings) if subtype_strings else None - return entity_subtypes_filter + # return entity_subtypes_filter def _format_query_value(self, query: str) -> str: query_pattern: str = r"^[\"'].+[\"']$" @@ -102,7 +99,6 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: "dc_where_to_access_dataset=", form_data.get("where_to_access", []) ) entity_types = self._build_entity_types(form_data.get("entity_types", [])) - entity_subtypes_filter = self._build_entity_subtypes_filter(form_data.get("entity_types", [])) filter_value = [] if domain: @@ -113,8 +109,6 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: filter_value.append( MultiSelectFilter("tags", [f"urn:li:tag:{tag}" for tag in tags]) ) - if entity_subtypes_filter: - filter_value.append(entity_subtypes_filter) page_for_search = str(int(page) - 1) if sort == "ascending": diff --git a/lib/datahub-client/data_platform_catalogue/client/search.py b/lib/datahub-client/data_platform_catalogue/client/search.py index 3e39d7f7..ae580e25 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search.py +++ b/lib/datahub-client/data_platform_catalogue/client/search.py @@ -88,8 +88,14 @@ def search( start = 0 if page is None else int(page) * count - types = self._map_result_types(result_types) - logger.debug(f"Getting facets with result types {types}") + datahub_types_set: list[str] = list(set( + self.fmd_type_to_datahub_types_mapping[result_type.value][0] for result_type in result_types)) + datahub_subtypes = list(self.fmd_type_to_datahub_types_mapping[result_type.value][1] for result_type in result_types) + datahub_subtypes_set = list(set(subtype for subtypes in datahub_subtypes for subtype in subtypes)) + datahub_subtypes_filter = MultiSelectFilter("typeNames", datahub_subtypes_set) + filters.append(datahub_subtypes_filter) + + logger.warning(f"Getting facets with {datahub_types_set=} {datahub_subtypes_set=}") # This is the tag that any and every entity we want to present in search results # now must have. @@ -104,7 +110,7 @@ def search( "count": count, "query": query, "start": start, - "types": types, + "types": datahub_types_set, "filters": formatted_filters, } @@ -171,32 +177,6 @@ def _get_matched_fields(result: dict) -> dict: matched_fields[name] = value return matched_fields - def search_facets( - self, - query: str = "*", - result_types: Sequence[EntityTypes] = (EntityTypes.TABLE,), - filters: Sequence[MultiSelectFilter] = (), - ) -> SearchFacets: - """ - Returns facets that can be used to filter the search results. - """ - types = self._map_result_types(result_types) - formatted_filters = self._map_filters(filters) - - variables = { - "query": query, - "facets": [], - "types": types, - "filters": formatted_filters, - } - - try: - response = self.graph.execute_graphql(self.facets_query, variables) - except GraphError as e: - raise CatalogueError("Unable to execute facets query") from e - - response = response["aggregateAcrossEntities"] - return self._parse_facets(response.get("facets", [])) def list_domains( self, diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index 757afca4..ebf237ce 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -50,12 +50,6 @@ def __new__(cls, value, datahub_entity_type, url_formatted): return obj -RESULT_TYPES_TO_FILTER = [ - EntityTypes.PUBLICATION_DATASET, - EntityTypes.PUBLICATION_COLLECTION -] - - class Audience(Enum): INTERNAL = "Internal" PUBLISHED = "Published"