Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update search form domain filters #597

Merged
merged 8 commits into from
Jul 29, 2024
50 changes: 7 additions & 43 deletions home/forms/search.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
from copy import deepcopy
from urllib.parse import urlencode

from data_platform_catalogue.search_types import ResultType
from data_platform_catalogue.search_types import DomainOption, ResultType
from django import forms

from ..models.domain_model import Domain, DomainModel
from ..service.search_facet_fetcher import SearchFacetFetcher
from ..models.domain_model import Domain
from ..service.domain_fetcher import DomainFetcher
from ..service.search_tag_fetcher import SearchTagFetcher


def get_domain_choices() -> list[Domain]:
"""Make API call to obtain domain choices"""
"""Make Domains API call to obtain domain choices"""
choices = [
Domain("", "All domains"),
]
facets = SearchFacetFetcher().fetch()
choices.extend(DomainModel(facets).top_level_domains)
return choices


def get_subdomain_choices() -> list[Domain]:
choices = [Domain("", "All subdomains")]
facets = SearchFacetFetcher().fetch()
choices.extend(DomainModel(facets).all_subdomains())
list_domain_options: list[DomainOption] = DomainFetcher().fetch()
domains: list[Domain] = [Domain(d.urn, d.name) for d in list_domain_options]
choices.extend(domains)
return choices


Expand Down Expand Up @@ -53,27 +47,6 @@ def get_tags():
return tags


class SelectWithOptionAttribute(forms.Select):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.domain_model = None

def create_option(
self, name, urn, label, selected, index, subindex=None, attrs=None
):
option = super().create_option(
name, urn, label, selected, index, subindex, attrs
)

facets = SearchFacetFetcher().fetch()
self.domain_model = self.domain_model or DomainModel(facets)

if urn:
option["attrs"]["data-parent"] = self.domain_model.get_parent_urn(urn)

return option


class SearchForm(forms.Form):
"""Django form to represent search page inputs"""

Expand All @@ -97,13 +70,6 @@ class SearchForm(forms.Form):
}
),
)
subdomain = forms.ChoiceField(
choices=get_subdomain_choices,
required=False,
widget=SelectWithOptionAttribute(
attrs={"form": "searchform", "class": "govuk-select"}
),
)
where_to_access = forms.MultipleChoiceField(
choices=get_where_to_access_choices,
required=False,
Expand Down Expand Up @@ -171,6 +137,4 @@ def encode_without_filter(self, filter_name: str, filter_value: str):
value.remove(filter_value)
elif isinstance(value, str) and filter_value == value:
query_params.pop(filter_name)
if filter_name == "domain":
query_params.pop("subdomain")
return f"?{urlencode(query_params, doseq=True)}"
32 changes: 3 additions & 29 deletions home/models/domain_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import NamedTuple

from data_platform_catalogue.search_types import SearchFacets
from data_platform_catalogue.search_types import DomainOption

logger = logging.getLogger(__name__)

Expand All @@ -12,40 +12,14 @@ class Domain(NamedTuple):


class DomainModel:
"""
Store information about domains and subdomains
"""

def __init__(self, search_facets: SearchFacets):
def __init__(self, domains: list[DomainOption]):
self.labels = {}

self.top_level_domains = [
Domain(option.value, option.label)
for option in search_facets.options("domains")
]
self.top_level_domains.sort(key=lambda d: d.label)

self.top_level_domains = [Domain(domain.urn, domain.name) for domain in domains]
logger.info(f"{self.top_level_domains=}")

self.subdomains = {}

for urn, label in self.top_level_domains:
self.labels[urn] = label

def all_subdomains(self) -> list[Domain]: # -> list[Any]
"""
A flat list of all subdomains
"""
subdomains = []
for domain_choices in self.subdomains.values():
subdomains.extend(domain_choices)
return subdomains

def get_parent_urn(self, child_subdomain_urn) -> str | None:
for domain, subdomains in self.subdomains.items():
for subdomain in subdomains:
if child_subdomain_urn == subdomain.urn:
return domain

def get_label(self, urn):
return self.labels.get(urn, urn)
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from data_platform_catalogue.search_types import ListDomainOption
from data_platform_catalogue.search_types import DomainOption
from django.core.cache import cache

from .base import GenericService


class ListDomainFetcher(GenericService):
class DomainFetcher(GenericService):
"""
ListDomainFetcher implementation to fetch domains with the total number of
DomainFetcher implementation to fetch domains with the total number of
associated entities from the backend.
"""

Expand All @@ -16,7 +16,7 @@ def __init__(self, filter_zero_entities: bool = True):
self.cache_timeout_seconds = 300
self.filter_zero_entities = filter_zero_entities

def fetch(self) -> list[ListDomainOption]:
def fetch(self) -> list[DomainOption]:
"""
Fetch a static list of options that is independent of the search query
and any applied filters. Values are cached for 5 seconds to avoid
Expand Down
37 changes: 7 additions & 30 deletions home/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any

from data_platform_catalogue.search_types import (
DomainOption,
MultiSelectFilter,
ResultType,
SearchResponse,
Expand All @@ -16,32 +17,13 @@
from home.models.domain_model import DomainModel

from .base import GenericService
from .search_facet_fetcher import SearchFacetFetcher


def domains_with_their_subdomains(
domain: str, subdomain: str, domain_model: DomainModel
) -> list[str]:
"""
Users can search by domain, and optionally by subdomain.
When subdomain is passed, then we can filter on that directly.

However, when we filter by domain alone, assets tagged to subdomains
are not automatically included, so we need to include all possible
subdomains in the filter.
"""
if subdomain:
return [subdomain]

subdomains = domain_model.subdomains.get(domain, [])
subdomains = [subdomain[0] for subdomain in subdomains]
return [domain, *subdomains] if not domain == "" else []
from .domain_fetcher import DomainFetcher


class SearchService(GenericService):
def __init__(self, form: SearchForm, page: str, items_per_page: int = 20):
facets = SearchFacetFetcher().fetch()
self.domain_model = DomainModel(facets)
domains: list[DomainOption] = DomainFetcher().fetch()
self.domain_model = DomainModel(domains)
self.stemmer = PorterStemmer()
self.form = form
if self.form.is_bound:
Expand Down Expand Up @@ -79,18 +61,14 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
query = form_data.get("query", "").replace("_", " ")
sort = form_data.get("sort", "relevance")
domain = form_data.get("domain", "")
subdomain = form_data.get("subdomain", "")
tags = form_data.get("tags", "")
domains_and_subdomains = domains_with_their_subdomains(
domain, subdomain, self.domain_model
)
where_to_access = self._build_custom_property_filter(
"dc_where_to_access_dataset=", form_data.get("where_to_access", [])
)
entity_types = self._build_entity_types(form_data.get("entity_types", []))
filter_value = []
if domains_and_subdomains:
filter_value.append(MultiSelectFilter("domains", domains_and_subdomains))
if domain:
filter_value.append(MultiSelectFilter("domains", [domain]))
if where_to_access:
filter_value.append(MultiSelectFilter("customProperties", where_to_access))
if tags:
Expand Down Expand Up @@ -167,9 +145,8 @@ def _generate_domain_clear_href(
self,
) -> dict[str, str]:
domain = self.form.cleaned_data.get("domain", "")
subdomain = self.form.cleaned_data.get("subdomain", "")

label = self.domain_model.get_label(subdomain or domain)
label = self.domain_model.get_label(domain)

return {
label: (
Expand Down
6 changes: 3 additions & 3 deletions home/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from data_platform_catalogue.client.exceptions import EntityDoesNotExist
from data_platform_catalogue.search_types import ListDomainOption
from data_platform_catalogue.search_types import DomainOption
from django.http import Http404, HttpResponseBadRequest
from django.shortcuts import render

Expand All @@ -9,8 +9,8 @@
DatabaseDetailsService,
DatasetDetailsService,
)
from home.service.domain_fetcher import DomainFetcher
from home.service.glossary import GlossaryService
from home.service.list_domain_fetcher import ListDomainFetcher
from home.service.metadata_specification import MetadataSpecificationService
from home.service.search import SearchService

Expand All @@ -19,7 +19,7 @@ def home_view(request):
"""
Displys only domains that have entities tagged for display in the catalog.
"""
domains: list[ListDomainOption] = ListDomainFetcher().fetch()
domains: list[DomainOption] = DomainFetcher().fetch()
context = {"domains": domains, "h1_value": "Home"}
return render(request, "home.html", context)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,6 @@
from importlib.resources import files
from typing import Sequence

from datahub.configuration.common import ConfigurationError
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.metadata import schema_classes
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.schema_classes import (
ChangeTypeClass,
ContainerClass,
ContainerPropertiesClass,
DatasetPropertiesClass,
DomainPropertiesClass,
DomainsClass,
OtherSchemaClass,
SchemaFieldClass,
SchemaFieldDataTypeClass,
SchemaMetadataClass,
SubTypesClass,
)

from data_platform_catalogue.client.exceptions import (
AspectDoesNotExist,
ConnectivityError,
Expand Down Expand Up @@ -57,13 +33,36 @@
Table,
)
from data_platform_catalogue.search_types import (
ListDomainOption,
DomainOption,
MultiSelectFilter,
ResultType,
SearchFacets,
SearchResponse,
SortOption,
)
from datahub.configuration.common import ConfigurationError
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.ingestion.source.common.subtypes import (
DatasetContainerSubTypes,
DatasetSubTypes,
)
from datahub.metadata import schema_classes
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.schema_classes import (
ChangeTypeClass,
ContainerClass,
ContainerPropertiesClass,
DatasetPropertiesClass,
DomainPropertiesClass,
DomainsClass,
OtherSchemaClass,
SchemaFieldClass,
SchemaFieldDataTypeClass,
SchemaMetadataClass,
SubTypesClass,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -230,9 +229,9 @@ def list_domains(
MultiSelectFilter("tags", ["urn:li:tag:dc_display_in_catalogue"])
],
count: int = 1000,
) -> list[ListDomainOption]:
) -> list[DomainOption]:
"""
Returns a list of ListDomainOption objects
Returns a list of DomainOption objects
"""
return self.search_client.list_domains(
query=query, filters=filters, count=count
Expand Down
Loading
Loading