Skip to content

Commit

Permalink
Populate domains drop down with what's been ingested in datahub (#407)
Browse files Browse the repository at this point in the history
* Add missing domain information from charts

* Update search tests that hit datahub dev

- remove entity which is not currently present
- enable the no_duplicates test (we have fixed this)

* Load the list of domains from Datahub

Previously we hardcoded the list of domains shown in the search filter,
and had different lists per environment.

This was useful in alpha when we had some junk domains we wanted to
filter out, but now we're at a point where every domain in Datahub
should be one we want to use.

This commit means we now fetch every domain that has something linked to
it, and display that in alphabetical order.

* Move domain model to models and remove unused model

* Refacotr: decouple SearchFacetFetcher from DomainModel

* Cache facets fetched from datahub

Ideally we would just fetch the facets once per request,
but in practice we do this from a few different places.

1. In the view we instantiate a SearchService, which uses the domain
   model in constructing filters for Datahub.
2. The SearchForm also needs them to know what choices are valid, so we
   need to pass a callback to the form's ChoiceField. That callback does
not share any data with the view.

Caching the value is a quick way to avoid making extra requests for the
same data.

* Hide subdomains if there aren't any defined

This is the case at the moment, because the domain model we've pulled in
from CaDeT doesn't have subdomains. This might change later though so I
don't want to remove the subdomain code completely.

* Include missing domains

Previously it was only returning domains with tables in. We should
include any that show as non-empty in Find MOJ Data.
  • Loading branch information
MatMoore authored Jun 11, 2024
1 parent d0b6db0 commit aee5e43
Show file tree
Hide file tree
Showing 20 changed files with 203 additions and 206 deletions.
6 changes: 6 additions & 0 deletions core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@
},
}

CACHES = {
"default": {
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
}
}

ANALYTICS_ID: str = os.environ.get("ANALYTICS_ID", "")
ENABLE_ANALYTICS: bool = (
os.environ.get("ENABLE_ANALYTICS") in TRUTHY_VALUES
Expand Down
135 changes: 0 additions & 135 deletions home/forms/domain_model.py

This file was deleted.

15 changes: 10 additions & 5 deletions home/forms/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,24 @@
from data_platform_catalogue.search_types import ResultType
from django import forms

from .domain_model import Domain, DomainModel
from ..models.domain_model import Domain, DomainModel
from ..service.search_facet_fetcher import SearchFacetFetcher


def get_domain_choices() -> list[Domain]:
"""Make API call to obtain domain choices"""
choices = [
Domain("", "All domains"),
]
choices.extend(DomainModel().top_level_domains)
facets = SearchFacetFetcher().fetch()
choices.extend(DomainModel(facets).top_level_domains)
return choices


def get_subdomain_choices() -> list[Domain]:
choices = [Domain("", "All subdomains")]
choices.extend(DomainModel().all_subdomains())
facets = SearchFacetFetcher().fetch()
choices.extend(DomainModel(facets).all_subdomains())
return choices


Expand Down Expand Up @@ -47,8 +50,7 @@ def get_entity_types():
class SelectWithOptionAttribute(forms.Select):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.domain_model = DomainModel()
self.domain_model = None

def create_option(
self, name, urn, label, selected, index, subindex=None, attrs=None
Expand All @@ -57,6 +59,9 @@ def create_option(
name, urn, label, selected, index, subindex, attrs
)

facets = SearchFacetFetcher().fetch()
self.domain_model = self.domain_model or DomainModel(facets)

if urn:
option["attrs"]["data-parent"] = self.domain_model.get_parent_urn(urn)

Expand Down
7 changes: 6 additions & 1 deletion home/helper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from data_platform_catalogue.search_types import ResultType


def filter_seleted_domains(domain_list, domains):
selected_domain = {}
for domain in domain_list:
Expand All @@ -7,6 +10,8 @@ def filter_seleted_domains(domain_list, domains):


def get_domain_list(client):
facets = client.search_facets()
facets = client.search_facets(
results_types=[ResultType.TABLE, ResultType.CHART, ResultType.DATABASE]
)
domain_list = facets.options("domain")
return domain_list
16 changes: 16 additions & 0 deletions home/migrations/0002_delete_catalogue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 5.0.6 on 2024-06-10 09:39

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("home", "0001_initial"),
]

operations = [
migrations.DeleteModel(
name="Catalogue",
),
]
8 changes: 0 additions & 8 deletions home/models.py

This file was deleted.

Empty file added home/models/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions home/models/domain_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging
from typing import NamedTuple

from data_platform_catalogue.search_types import SearchFacets

logger = logging.getLogger(__name__)


class Domain(NamedTuple):
urn: str
label: str


class DomainModel:
"""
Store information about domains and subdomains
"""

def __init__(self, search_facets: SearchFacets):
self.labels = {}

self.top_level_domains = [
Domain(option.value, option.label)
for option in search_facets.options("domains")
]
self.top_level_domains.sort(key=lambda d: d.label)

logger.info(f"{self.top_level_domains=}")

self.subdomains = {}

for urn, label in self.top_level_domains:
self.labels[urn] = label

def all_subdomains(self) -> list[Domain]: # -> list[Any]
"""
A flat list of all subdomains
"""
subdomains = []
for domain_choices in self.subdomains.values():
subdomains.extend(domain_choices)
return subdomains

def get_parent_urn(self, child_subdomain_urn) -> str | None:
for domain, subdomains in self.subdomains.items():
for subdomain in subdomains:
if child_subdomain_urn == subdomain.urn:
return domain

def get_label(self, urn):
return self.labels.get(urn, urn)
16 changes: 11 additions & 5 deletions home/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
from django.core.paginator import Paginator
from nltk.stem import PorterStemmer

from home.forms.domain_model import DomainModel
from home.forms.search import SearchForm
from home.models.domain_model import DomainModel

from .base import GenericService
from .search_facet_fetcher import SearchFacetFetcher


def domains_with_their_subdomains(domain: str, subdomain: str) -> list[str]:
def domains_with_their_subdomains(
domain: str, subdomain: str, domain_model: DomainModel
) -> list[str]:
"""
Users can search by domain, and optionally by subdomain.
When subdomain is passed, then we can filter on that directly.
Expand All @@ -30,14 +33,15 @@ def domains_with_their_subdomains(domain: str, subdomain: str) -> list[str]:
if subdomain:
return [subdomain]

subdomains = DomainModel().subdomains.get(domain, [])
subdomains = domain_model.subdomains.get(domain, [])
subdomains = [subdomain[0] for subdomain in subdomains]
return [domain, *subdomains] if not domain == "" else []


class SearchService(GenericService):
def __init__(self, form: SearchForm, page: str, items_per_page: int = 20):
self.domain_model = DomainModel()
facets = SearchFacetFetcher().fetch()
self.domain_model = DomainModel(facets)
self.stemmer = PorterStemmer()
self.form = form
if self.form.is_bound:
Expand Down Expand Up @@ -76,7 +80,9 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
sort = form_data.get("sort", "relevance")
domain = form_data.get("domain", "")
subdomain = form_data.get("subdomain", "")
domains_and_subdomains = domains_with_their_subdomains(domain, subdomain)
domains_and_subdomains = domains_with_their_subdomains(
domain, subdomain, self.domain_model
)
where_to_access = self._build_custom_property_filter(
"whereToAccessDataset=", form_data.get("where_to_access", [])
)
Expand Down
24 changes: 24 additions & 0 deletions home/service/search_facet_fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from data_platform_catalogue.search_types import SearchFacets
from django.core.cache import cache

from .base import GenericService


class SearchFacetFetcher(GenericService):
def __init__(self):
self.client = self._get_catalogue_client()
self.cache_key = "search_facets"
self.cache_timeout_seconds = 5

def fetch(self) -> SearchFacets:
"""
Fetch a static list of options that is independent of the search query
and any applied filters. Values are cached for 5 seconds to avoid
unnecessary queries.
"""
result = cache.get(self.cache_key)
if not result:
result = self.client.search_facets()
cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds)

return result
4 changes: 4 additions & 0 deletions lib/datahub-client/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

- Return domain metadata for Charts

## [1.0.1] 2024-05-07

Change of build repo and several bug fixes following the refactor.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ query getChartDetails($urn: String!) {
platform {
name
}
domain {
domain {
urn
id
properties {
name
description
}
}
}
ownership {
owners {
owner {
Expand Down
Loading

0 comments on commit aee5e43

Please sign in to comment.