Skip to content

Commit

Permalink
Trying to fix the filters
Browse files Browse the repository at this point in the history
  • Loading branch information
murdo-moj committed Dec 2, 2024
1 parent f62ba4d commit e2ff430
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 50 deletions.
24 changes: 9 additions & 15 deletions home/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from django.utils.translation import pgettext
from nltk.stem import PorterStemmer

from data_platform_catalogue.entities import RESULT_TYPES_TO_FILTER
from home.forms.search import SearchForm
from home.models.domain_model import DomainModel

Expand Down Expand Up @@ -51,30 +50,28 @@ def _build_entity_types(self, entity_types: list[str]) -> tuple[EntityTypes, ...
entity
for entity in EntityTypes
if entity.name != "GLOSSARY_TERM"
and entity not in RESULT_TYPES_TO_FILTER
)
chosen_entities = (
tuple(
EntityTypes[entity]
for entity in entity_types
if EntityTypes[entity] not in RESULT_TYPES_TO_FILTER
)
if entity_types
else None
)

return chosen_entities if chosen_entities else default_entities

def _build_entity_subtypes_filter(self, entity_types: list[str]) -> MultiSelectFilter | None:
# The filter needs a non-capitalised string rather than the enum value
subtype_strings = [
EntityTypes[entity_type].value
for entity_type in entity_types
if EntityTypes[entity_type] in RESULT_TYPES_TO_FILTER
]
entity_subtypes_filter = MultiSelectFilter("typeNames", subtype_strings) if subtype_strings else None
# def _build_entity_subtypes_filter(self, entity_types: list[str]) -> MultiSelectFilter | None:
# # The filter needs a non-capitalised string rather than the enum value
# subtype_strings = [
# EntityTypes[entity_type].value
# for entity_type in entity_types
# if EntityTypes[entity_type] in RESULT_TYPES_TO_FILTER
# ]
# entity_subtypes_filter = MultiSelectFilter("typeNames", subtype_strings) if subtype_strings else None

return entity_subtypes_filter
# return entity_subtypes_filter

def _format_query_value(self, query: str) -> str:
query_pattern: str = r"^[\"'].+[\"']$"
Expand Down Expand Up @@ -102,7 +99,6 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
"dc_where_to_access_dataset=", form_data.get("where_to_access", [])
)
entity_types = self._build_entity_types(form_data.get("entity_types", []))
entity_subtypes_filter = self._build_entity_subtypes_filter(form_data.get("entity_types", []))

filter_value = []
if domain:
Expand All @@ -113,8 +109,6 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
filter_value.append(
MultiSelectFilter("tags", [f"urn:li:tag:{tag}" for tag in tags])
)
if entity_subtypes_filter:
filter_value.append(entity_subtypes_filter)

page_for_search = str(int(page) - 1)
if sort == "ascending":
Expand Down
38 changes: 9 additions & 29 deletions lib/datahub-client/data_platform_catalogue/client/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,14 @@ def search(

start = 0 if page is None else int(page) * count

types = self._map_result_types(result_types)
logger.debug(f"Getting facets with result types {types}")
datahub_types_set: list[str] = list(set(
self.fmd_type_to_datahub_types_mapping[result_type.value][0] for result_type in result_types))
datahub_subtypes = list(self.fmd_type_to_datahub_types_mapping[result_type.value][1] for result_type in result_types)
datahub_subtypes_set = list(set(subtype for subtypes in datahub_subtypes for subtype in subtypes))
datahub_subtypes_filter = MultiSelectFilter("typeNames", datahub_subtypes_set)
filters.append(datahub_subtypes_filter)

logger.warning(f"Getting facets with {datahub_types_set=} {datahub_subtypes_set=}")

# This is the tag that any and every entity we want to present in search results
# now must have.
Expand All @@ -104,7 +110,7 @@ def search(
"count": count,
"query": query,
"start": start,
"types": types,
"types": datahub_types_set,
"filters": formatted_filters,
}

Expand Down Expand Up @@ -171,32 +177,6 @@ def _get_matched_fields(result: dict) -> dict:
matched_fields[name] = value
return matched_fields

def search_facets(
self,
query: str = "*",
result_types: Sequence[EntityTypes] = (EntityTypes.TABLE,),
filters: Sequence[MultiSelectFilter] = (),
) -> SearchFacets:
"""
Returns facets that can be used to filter the search results.
"""
types = self._map_result_types(result_types)
formatted_filters = self._map_filters(filters)

variables = {
"query": query,
"facets": [],
"types": types,
"filters": formatted_filters,
}

try:
response = self.graph.execute_graphql(self.facets_query, variables)
except GraphError as e:
raise CatalogueError("Unable to execute facets query") from e

response = response["aggregateAcrossEntities"]
return self._parse_facets(response.get("facets", []))

def list_domains(
self,
Expand Down
6 changes: 0 additions & 6 deletions lib/datahub-client/data_platform_catalogue/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,6 @@ def __new__(cls, value, datahub_entity_type, url_formatted):
return obj


RESULT_TYPES_TO_FILTER = [
EntityTypes.PUBLICATION_DATASET,
EntityTypes.PUBLICATION_COLLECTION
]


class Audience(Enum):
INTERNAL = "Internal"
PUBLISHED = "Published"
Expand Down

0 comments on commit e2ff430

Please sign in to comment.