diff --git a/core/settings.py b/core/settings.py index 9a66042c..d91ecc44 100644 --- a/core/settings.py +++ b/core/settings.py @@ -181,6 +181,12 @@ }, } +CACHES = { + "default": { + "BACKEND": "django.core.cache.backends.locmem.LocMemCache", + } +} + ANALYTICS_ID: str = os.environ.get("ANALYTICS_ID", "") ENABLE_ANALYTICS: bool = ( os.environ.get("ENABLE_ANALYTICS") in TRUTHY_VALUES diff --git a/home/forms/domain_model.py b/home/forms/domain_model.py deleted file mode 100644 index bd657276..00000000 --- a/home/forms/domain_model.py +++ /dev/null @@ -1,135 +0,0 @@ -import os -from typing import NamedTuple - - -class Domain(NamedTuple): - urn: str - label: str - - -class DomainModel: - """ - Store information about domains and subdomains - """ - - def __init__(self): - self.labels = {} - # This is temporary whilst we still have the dev enviroment connected to a - # datahub with different domains. - if os.environ.get("ENV") == "dev": - self.top_level_domains = [ - Domain("urn:li:domain:HMCTS", "HMCTS"), - Domain("urn:li:domain:HMPPS", "HMPPS"), - Domain("urn:li:domain:HQ", "HQ"), - Domain("urn:li:domain:LAA", "LAA"), - Domain("urn:li:domain:OPG", "OPG"), - ] - - for urn, label in self.top_level_domains: - self.labels[urn] = label - - self.subdomains = { - "urn:li:domain:HMPPS": [ - Domain( - "urn:li:domain:2feb789b-44d3-4412-b998-1f26819fabf9", "Prisons" - ), - Domain( - "urn:li:domain:abe153c1-416b-4abb-be7f-6accf2abb10a", - "Probation", - ), - ], - "urn:li:domain:HMCTS": [ - Domain( - "urn:li:domain:4d77af6d-9eca-4c44-b189-5f1addffae55", - "Civil courts", - ), - Domain( - "urn:li:domain:31754f66-33df-4a73-b039-532518bc765e", - "Crown courts", - ), - Domain( - "urn:li:domain:81adfe94-1284-46a2-9179-945ad2a76c14", - "Family courts", - ), - Domain( - "urn:li:domain:b261176c-d8eb-4111-8454-c0a1fa95005f", - "Magistrates courts", - ), - ], - "urn:li:domain:OPG": [ - Domain( - "urn:li:domain:bc091f6c-7674-4c82-a315-f5489398f099", - "Lasting power of attourney", - ), - Domain( - "urn:li:domain:efb9ade3-3c5d-4c5c-b451-df9f2d8136f5", - "Supervision orders", - ), - ], - "urn:li:domain:HQ": [ - Domain( - "urn:li:domain:9fb7ff13-6c7e-47ef-bef1-b13b23fd8c7a", "Estates" - ), - Domain( - "urn:li:domain:e4476e66-37a1-40fd-83b9-c908f805d8f4", "Finance" - ), - Domain( - "urn:li:domain:0985731b-8e1c-4b4a-bfc0-38e58d8ba8a1", "People" - ), - Domain( - "urn:li:domain:a320c915-0b43-4277-9769-66615aab4adc", - "Performance", - ), - ], - "urn:li:domain:LAA": [ - Domain( - "urn:li:domain:24344488-d770-437a-ba6f-e6129203b927", - "Civil legal advice", - ), - Domain("urn:li:domain:Legal%20Aid", "Legal aid"), - Domain( - "urn:li:domain:5c423c06-d328-431f-8634-7a7e86928819", - "Public defender", - ), - ], - } - for domain, subdomains in self.subdomains.items(): - domain_label = self.labels[domain] - for urn, subdoman_label in subdomains: - self.labels[urn] = f"{domain_label} - {subdoman_label}" - else: - self.top_level_domains = [ - Domain("urn:li:domain:courts", "Courts"), - Domain("urn:li:domain:electronic_monitoring", "Electronic monitoring"), - Domain("urn:li:domain:general", "General"), - Domain("urn:li:domain:interventions", "Interventions"), - Domain("urn:li:domain:opg", "OPG"), - Domain("urn:li:domain:prison", "Prison"), - Domain("urn:li:domain:probation", "Probation"), - Domain("urn:li:domain:risk", "Risk"), - Domain( - "urn:li:domain:victims_case_management", "Victims case management" - ), - ] - self.subdomains = {} - - for urn, label in self.top_level_domains: - self.labels[urn] = label - - def all_subdomains(self) -> list[Domain]: # -> list[Any] - """ - A flat list of all subdomains - """ - subdomains = [] - for domain_choices in self.subdomains.values(): - subdomains.extend(domain_choices) - return subdomains - - def get_parent_urn(self, child_subdomain_urn) -> str | None: - for domain, subdomains in self.subdomains.items(): - for subdomain in subdomains: - if child_subdomain_urn == subdomain.urn: - return domain - - def get_label(self, urn): - return self.labels.get(urn, urn) diff --git a/home/forms/search.py b/home/forms/search.py index 128a07c4..60c1eea6 100644 --- a/home/forms/search.py +++ b/home/forms/search.py @@ -4,7 +4,8 @@ from data_platform_catalogue.search_types import ResultType from django import forms -from .domain_model import Domain, DomainModel +from ..models.domain_model import Domain, DomainModel +from ..service.search_facet_fetcher import SearchFacetFetcher def get_domain_choices() -> list[Domain]: @@ -12,13 +13,15 @@ def get_domain_choices() -> list[Domain]: choices = [ Domain("", "All domains"), ] - choices.extend(DomainModel().top_level_domains) + facets = SearchFacetFetcher().fetch() + choices.extend(DomainModel(facets).top_level_domains) return choices def get_subdomain_choices() -> list[Domain]: choices = [Domain("", "All subdomains")] - choices.extend(DomainModel().all_subdomains()) + facets = SearchFacetFetcher().fetch() + choices.extend(DomainModel(facets).all_subdomains()) return choices @@ -47,8 +50,7 @@ def get_entity_types(): class SelectWithOptionAttribute(forms.Select): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - - self.domain_model = DomainModel() + self.domain_model = None def create_option( self, name, urn, label, selected, index, subindex=None, attrs=None @@ -57,6 +59,9 @@ def create_option( name, urn, label, selected, index, subindex, attrs ) + facets = SearchFacetFetcher().fetch() + self.domain_model = self.domain_model or DomainModel(facets) + if urn: option["attrs"]["data-parent"] = self.domain_model.get_parent_urn(urn) diff --git a/home/helper.py b/home/helper.py index b5eb644e..be11c34c 100644 --- a/home/helper.py +++ b/home/helper.py @@ -1,3 +1,6 @@ +from data_platform_catalogue.search_types import ResultType + + def filter_seleted_domains(domain_list, domains): selected_domain = {} for domain in domain_list: @@ -7,6 +10,8 @@ def filter_seleted_domains(domain_list, domains): def get_domain_list(client): - facets = client.search_facets() + facets = client.search_facets( + results_types=[ResultType.TABLE, ResultType.CHART, ResultType.DATABASE] + ) domain_list = facets.options("domain") return domain_list diff --git a/home/migrations/0002_delete_catalogue.py b/home/migrations/0002_delete_catalogue.py new file mode 100644 index 00000000..bac28038 --- /dev/null +++ b/home/migrations/0002_delete_catalogue.py @@ -0,0 +1,16 @@ +# Generated by Django 5.0.6 on 2024-06-10 09:39 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("home", "0001_initial"), + ] + + operations = [ + migrations.DeleteModel( + name="Catalogue", + ), + ] diff --git a/home/models.py b/home/models.py deleted file mode 100644 index 7d7e3695..00000000 --- a/home/models.py +++ /dev/null @@ -1,8 +0,0 @@ -from django.db import models - -# Create your models here. - - -class Catalogue(models.Model): - name = models.CharField(max_length=25) - description = models.TextField(max_length=100) diff --git a/home/models/__init__.py b/home/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/home/models/domain_model.py b/home/models/domain_model.py new file mode 100644 index 00000000..c0b96dbd --- /dev/null +++ b/home/models/domain_model.py @@ -0,0 +1,51 @@ +import logging +from typing import NamedTuple + +from data_platform_catalogue.search_types import SearchFacets + +logger = logging.getLogger(__name__) + + +class Domain(NamedTuple): + urn: str + label: str + + +class DomainModel: + """ + Store information about domains and subdomains + """ + + def __init__(self, search_facets: SearchFacets): + self.labels = {} + + self.top_level_domains = [ + Domain(option.value, option.label) + for option in search_facets.options("domains") + ] + self.top_level_domains.sort(key=lambda d: d.label) + + logger.info(f"{self.top_level_domains=}") + + self.subdomains = {} + + for urn, label in self.top_level_domains: + self.labels[urn] = label + + def all_subdomains(self) -> list[Domain]: # -> list[Any] + """ + A flat list of all subdomains + """ + subdomains = [] + for domain_choices in self.subdomains.values(): + subdomains.extend(domain_choices) + return subdomains + + def get_parent_urn(self, child_subdomain_urn) -> str | None: + for domain, subdomains in self.subdomains.items(): + for subdomain in subdomains: + if child_subdomain_urn == subdomain.urn: + return domain + + def get_label(self, urn): + return self.labels.get(urn, urn) diff --git a/home/service/search.py b/home/service/search.py index 6f1e321a..f5b28195 100644 --- a/home/service/search.py +++ b/home/service/search.py @@ -12,13 +12,16 @@ from django.core.paginator import Paginator from nltk.stem import PorterStemmer -from home.forms.domain_model import DomainModel from home.forms.search import SearchForm +from home.models.domain_model import DomainModel from .base import GenericService +from .search_facet_fetcher import SearchFacetFetcher -def domains_with_their_subdomains(domain: str, subdomain: str) -> list[str]: +def domains_with_their_subdomains( + domain: str, subdomain: str, domain_model: DomainModel +) -> list[str]: """ Users can search by domain, and optionally by subdomain. When subdomain is passed, then we can filter on that directly. @@ -30,14 +33,15 @@ def domains_with_their_subdomains(domain: str, subdomain: str) -> list[str]: if subdomain: return [subdomain] - subdomains = DomainModel().subdomains.get(domain, []) + subdomains = domain_model.subdomains.get(domain, []) subdomains = [subdomain[0] for subdomain in subdomains] return [domain, *subdomains] if not domain == "" else [] class SearchService(GenericService): def __init__(self, form: SearchForm, page: str, items_per_page: int = 20): - self.domain_model = DomainModel() + facets = SearchFacetFetcher().fetch() + self.domain_model = DomainModel(facets) self.stemmer = PorterStemmer() self.form = form if self.form.is_bound: @@ -76,7 +80,9 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse: sort = form_data.get("sort", "relevance") domain = form_data.get("domain", "") subdomain = form_data.get("subdomain", "") - domains_and_subdomains = domains_with_their_subdomains(domain, subdomain) + domains_and_subdomains = domains_with_their_subdomains( + domain, subdomain, self.domain_model + ) where_to_access = self._build_custom_property_filter( "whereToAccessDataset=", form_data.get("where_to_access", []) ) diff --git a/home/service/search_facet_fetcher.py b/home/service/search_facet_fetcher.py new file mode 100644 index 00000000..fb2cdec7 --- /dev/null +++ b/home/service/search_facet_fetcher.py @@ -0,0 +1,24 @@ +from data_platform_catalogue.search_types import SearchFacets +from django.core.cache import cache + +from .base import GenericService + + +class SearchFacetFetcher(GenericService): + def __init__(self): + self.client = self._get_catalogue_client() + self.cache_key = "search_facets" + self.cache_timeout_seconds = 5 + + def fetch(self) -> SearchFacets: + """ + Fetch a static list of options that is independent of the search query + and any applied filters. Values are cached for 5 seconds to avoid + unnecessary queries. + """ + result = cache.get(self.cache_key) + if not result: + result = self.client.search_facets() + cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds) + + return result diff --git a/lib/datahub-client/CHANGELOG.md b/lib/datahub-client/CHANGELOG.md index 048675e0..4ee3071e 100644 --- a/lib/datahub-client/CHANGELOG.md +++ b/lib/datahub-client/CHANGELOG.md @@ -7,6 +7,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +- Return domain metadata for Charts + ## [1.0.1] 2024-05-07 Change of build repo and several bug fixes following the refactor. diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql index 4409888c..4174368c 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql @@ -5,6 +5,16 @@ query getChartDetails($urn: String!) { platform { name } + domain { + domain { + urn + id + properties { + name + description + } + } + } ownership { owners { owner { diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql index cbd17b69..5ae26ce4 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql @@ -89,6 +89,16 @@ query Search( value } } + domain { + domain { + urn + id + properties { + name + description + } + } + } } ... on Dataset { urn @@ -233,7 +243,7 @@ query Search( } } } - ... on Container { + ... on Container { urn type subTypes { diff --git a/lib/datahub-client/data_platform_catalogue/client/search.py b/lib/datahub-client/data_platform_catalogue/client/search.py index 48e9d719..e72572e7 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search.py +++ b/lib/datahub-client/data_platform_catalogue/client/search.py @@ -75,6 +75,7 @@ def search( start = 0 if page is None else int(page) * count types = self._map_result_types(result_types) + logger.debug(f"Getting facets with result types {types}") # This is the tag that any and every entity we want to present in search results # now must have. @@ -270,6 +271,7 @@ def _parse_result( "domain_id": domain.urn, "entity_types": self._parse_types_and_sub_types(entity, "Dataset"), } + logger.debug(f"{metadata=}") metadata.update(custom_properties.usage_restrictions.model_dump()) metadata.update(custom_properties.access_information.model_dump()) diff --git a/templates/partial/filter.html b/templates/partial/filter.html index 11549352..8a73815f 100644 --- a/templates/partial/filter.html +++ b/templates/partial/filter.html @@ -23,10 +23,12 @@

Filter

{{form.domain}} + {% if form.subdomain.choices %}
{{form.subdomain}}
+ {% endif %}
diff --git a/tests/benchmark/test_exact_matches.py b/tests/benchmark/test_exact_matches.py index 503f9426..9cc7bf9d 100644 --- a/tests/benchmark/test_exact_matches.py +++ b/tests/benchmark/test_exact_matches.py @@ -16,19 +16,15 @@ [ ( "prison_population_history.chunk_assignment", - "urn:li:dataset:(urn:li:dataPlatform:dbt,awsdatacatalog.prison_population_history.chunk_assignment,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:dbt,cadet.awsdatacatalog.prison_population_history.chunk_assignment,PROD)", ), ( "Accommodation on the first night following release", "urn:li:chart:(justice-data,accommodation-on-release)", ), - ( - "vcms_activations", - "urn:li:dataset:(urn:li:dataPlatform:dbt,awsdatacatalog.alpha_vcms_data.vcms_activations,PROD)", - ), ( "ns_postcode_lookup_latest_2011census", - "urn:li:dataset:(urn:li:dataPlatform:dbt,awsdatacatalog.common_lookup.ns_postcode_lookup_latest_2011census,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:dbt,cadet.awsdatacatalog.common_lookup.ns_postcode_lookup_latest_2011census,PROD)", ), ], ) @@ -46,7 +42,6 @@ def test_exact_title_match(query, expected_urn): assert results.page_results[0].urn == expected_urn -@pytest.mark.xfail @pytest.mark.slow @pytest.mark.datahub @pytest.mark.parametrize( diff --git a/tests/conftest.py b/tests/conftest.py index a649b80b..44dff717 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,8 +32,10 @@ from faker import Faker from home.forms.search import SearchForm +from home.models.domain_model import DomainModel from home.service.details import DatabaseDetailsService from home.service.search import SearchService +from home.service.search_facet_fetcher import SearchFacetFetcher fake = Faker() @@ -144,22 +146,6 @@ def generate_page(page_size=20, result_type: ResultType | None = None): return results -def generate_options(num_options=5): - """ - Generate a list of options for the search facets - """ - results = [] - for _ in range(num_options): - results.append( - FacetOption( - value=fake.name(), - label=fake.name(), - count=fake.random_int(min=0, max=100), - ) - ) - return results - - @pytest.fixture(autouse=True) def client(): client = Client() @@ -179,7 +165,26 @@ def mock_catalogue(request): mock_search_response( mock_catalogue, page_results=generate_page(), total_results=100 ) - mock_search_facets_response(mock_catalogue, domains=generate_options()) + mock_search_facets_response( + mock_catalogue, + domains=[ + FacetOption( + value="urn:li:domain:prisons", + label="Prisons", + count=fake.random_int(min=0, max=100), + ), + FacetOption( + value="urn:li:domain:courts", + label="Courts", + count=fake.random_int(min=0, max=100), + ), + FacetOption( + value="urn:li:domain:finance", + label="Finance", + count=fake.random_int(min=0, max=100), + ), + ], + ) mock_get_glossary_terms_response(mock_catalogue) mock_list_database_tables_response( mock_catalogue, @@ -294,11 +299,21 @@ def mock_get_glossary_terms_response(mock_catalogue): @pytest.fixture -def valid_form(): +def search_facets(): + return SearchFacetFetcher().fetch() + + +@pytest.fixture +def valid_domain(search_facets): + return DomainModel(search_facets).top_level_domains[0] + + +@pytest.fixture +def valid_form(valid_domain): valid_form = SearchForm( data={ "query": "test", - "domain": "urn:li:domain:prison", + "domain": valid_domain.urn, "entity_types": ["TABLE"], "where_to_access": ["analytical_platform"], "sort": "ascending", diff --git a/tests/home/service/test_search.py b/tests/home/service/test_search.py index a17ec589..cd64a990 100644 --- a/tests/home/service/test_search.py +++ b/tests/home/service/test_search.py @@ -1,6 +1,7 @@ import os import re from types import GeneratorType +from urllib.parse import quote import pytest @@ -29,9 +30,9 @@ def test_get_context_paginator(self, search_context): def test_get_context_h1_value(self, search_context): assert search_context["h1_value"] == "Search" - def test_get_context_label_clear_href(self, search_context): + def test_get_context_label_clear_href(self, search_context, valid_domain): assert search_context["label_clear_href"]["domain"] == { - "Prison": ( + valid_domain.label: ( "?query=test&" "where_to_access=analytical_platform&" "entity_types=TABLE&" @@ -44,7 +45,7 @@ def test_get_context_label_clear_href(self, search_context): assert search_context["label_clear_href"]["Where To Access"] == { "analytical_platform": ( "?query=test&" - "domain=urn%3Ali%3Adomain%3Aprison&" + f"domain={quote(valid_domain.urn)}&" "subdomain=&" "entity_types=TABLE&" "sort=ascending&" @@ -56,7 +57,7 @@ def test_get_context_label_clear_href(self, search_context): assert search_context["label_clear_href"]["Entity Types"] == { "Table": ( "?query=test&" - "domain=urn%3Ali%3Adomain%3Aprison&" + f"domain={quote(valid_domain.urn)}&" "subdomain=&" "where_to_access=analytical_platform&" "sort=ascending&" diff --git a/tests/selenium/test_search_scenarios.py b/tests/selenium/test_search_scenarios.py index 76dc5605..719efe6e 100644 --- a/tests/selenium/test_search_scenarios.py +++ b/tests/selenium/test_search_scenarios.py @@ -59,7 +59,7 @@ def test_apply_domain_filters(self): """ Interacts with the filters on the left hand side """ - domain = "Prison" + domain = "Prisons" self.start_on_the_search_page() self.select_domain(domain) self.click_apply_filters() @@ -98,7 +98,7 @@ def test_filters_query_and_sort_persist(self): Search settings persist as the user continues to interact with the search page. """ - domain = "Prison" + domain = "Prisons" self.start_on_the_search_page() self.select_domain(domain) self.click_apply_filters() @@ -131,7 +131,7 @@ def test_adding_a_filter_resets_pagination(self): self.click_next_page() self.verify_page("2") - self.select_domain("Prison") + self.select_domain("Prisons") self.click_apply_filters() self.verify_page("1") @@ -140,20 +140,19 @@ def test_clear_single_filter(self): Users can clear a filter by clicking on it within the "selected filters" panel. """ - domain = "Prison" + domain = "Prisons" self.start_on_the_search_page() self.select_domain(domain) self.click_apply_filters() self.verify_domain_selected(domain) self.click_clear_selected_filter(domain) self.verify_unselected_domain() - self.verify_unselected_subdomain() def test_clear_all_filters(self): """ Users can click a button to clear all filters. """ - domain = "Prison" + domain = "Prisons" self.start_on_the_search_page() @@ -271,9 +270,6 @@ def enter_a_query_and_submit(self, query): def select_domain(self, domain): self.search_page.select_domain(domain) - def select_subdomain(self, domain): - self.search_page.select_subdomain(domain) - def click_option(self, sortby): self.search_page.sort_label(sortby).click() @@ -295,18 +291,10 @@ def verify_domain_selected(self, domain): selected_domain = self.search_page.get_selected_domain().text assert selected_domain == domain - def verify_subdomain_selected(self, domain): - selected_domain = self.search_page.get_selected_subdomain().text - assert selected_domain == domain - def verify_unselected_domain(self): selected_domain = self.search_page.get_selected_domain().text assert selected_domain == "All domains" - def verify_unselected_subdomain(self): - selected_domain = self.search_page.get_selected_subdomain().text - assert selected_domain == "All subdomains" - def verify_selected_filters_shown(self, domains): actual = {i.text for i in self.search_page.selected_filter_tags()} expected = set(domains) diff --git a/tests/test_forms.py b/tests/test_forms.py index d5d48422..7502923c 100644 --- a/tests/test_forms.py +++ b/tests/test_forms.py @@ -15,9 +15,9 @@ def test_sort_is_from_sort_list_false(self): def test_all_fields_nullable(self): assert SearchForm(data={}).is_valid() - def test_form_encode_without_filter_for_one_filter(self, valid_form): + def test_form_encode_without_filter_for_one_filter(self, valid_form, valid_domain): assert valid_form.encode_without_filter( - filter_name="domain", filter_value="urn:li:domain:prison" + filter_name="domain", filter_value=valid_domain.urn ) == ( "?query=test&" "where_to_access=analytical_platform&" @@ -31,14 +31,14 @@ def test_form_encode_without_filter_for_two_filters(self): two_filter_form = SearchForm( data={ "query": "test", - "domain": "urn:li:domain:prison", + "domain": "urn:li:domain:prisons", "entity_types": ["TABLE"], } ) two_filter_form.is_valid() assert two_filter_form.encode_without_filter( - filter_name="domain", filter_value="urn:li:domain:prison" + filter_name="domain", filter_value="urn:li:domain:prisons" ) == ( "?query=test&" "entity_types=TABLE&"