diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index 1b6d2522..c544bf3b 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -78,7 +78,7 @@ jobs: - name: run selenium tests id: slow-tests if: steps.fast-tests.outcome == 'success' - run: poetry run pytest tests/selenium --axe-version 4.9.0 + run: poetry run pytest tests/selenium --axe-version 4.9.1 --chromedriver-path /usr/local/bin/chromedriver javascript: runs-on: ubuntu-latest diff --git a/home/forms/search.py b/home/forms/search.py index 60c1eea6..afae70e8 100644 --- a/home/forms/search.py +++ b/home/forms/search.py @@ -69,7 +69,7 @@ def create_option( class SearchForm(forms.Form): - """Django form to represent data product search page inputs""" + """Django form to represent search page inputs""" query = forms.CharField( max_length=100, diff --git a/home/service/details.py b/home/service/details.py index 4525b792..5abd8f4a 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -70,9 +70,7 @@ def __init__(self, urn: str): if parents: # Pick the first entity to use as the parent in the breadcrumb. # If the dataset belongs to multiple parents, this may diverge - # from the path the user took to get to this page. However as of datahub - # v0.12, assigning to multiple data products is not possible and we don't - # have datasets with multiple parent containers. + # from the path the user took to get to this page. self.parent_entity = parents[0] self.dataset_parent_type = ResultType.DATABASE.name.lower() else: diff --git a/lib/datahub-client/CHANGELOG.md b/lib/datahub-client/CHANGELOG.md index 4ee3071e..e9fc9fed 100644 --- a/lib/datahub-client/CHANGELOG.md +++ b/lib/datahub-client/CHANGELOG.md @@ -9,7 +9,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + - Return domain metadata for Charts +- Add `glossary_terms` list to `SearchResult` + +### Changed + +- Return lists of objects for `SearchResult.tags` and `SearchResult.tags_to_display` instead of strings. + +## Removed + +- Removed all remaining references to Data Products ## [1.0.1] 2024-05-07 diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql index 9f40e2aa..ee77b504 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql @@ -32,21 +32,6 @@ query getDatasetDetails($urn: String!) { } } } - data_product_relations: relationships( - input: { types: ["DataProductContains"], direction: INCOMING, count: 10 } - ) { - total - relationships { - entity { - urn - ... on DataProduct { - properties { - name - } - } - } - } - } ownership { owners { owner { diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql index 5ae26ce4..672cd237 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql @@ -80,6 +80,28 @@ query Search( } } } + tags { + tags { + tag { + urn + properties { + name + description + } + } + } + } + glossaryTerms { + terms { + term { + urn + properties { + name + description + } + } + } + } properties { name description @@ -109,25 +131,6 @@ query Search( subTypes { typeNames } - relationships( - input: { - types: ["DataProductContains"] - direction: INCOMING - count: 10 - } - ) { - total - relationships { - entity { - urn - ... on DataProduct { - properties { - name - } - } - } - } - } ownership { owners { owner { @@ -177,6 +180,17 @@ query Search( } } } + glossaryTerms { + terms { + term { + urn + properties { + name + description + } + } + } + } lastIngested domain { domain { @@ -189,9 +203,12 @@ query Search( } } } - ... on DataProduct { + ... on Container { urn type + subTypes { + typeNames + } ownership { owners { owner { @@ -219,7 +236,6 @@ query Search( key value } - numAssets } domain { domain { @@ -242,54 +258,9 @@ query Search( } } } - } - ... on Container { - urn - type - subTypes { - typeNames - } - ownership { - owners { - owner { - ... on CorpUser { - urn - properties { - fullName - email - } - } - ... on CorpGroup { - urn - properties { - displayName - email - } - } - } - } - } - properties { - name - description - customProperties { - key - value - } - } - domain { - domain { - urn - id - properties { - name - description - } - } - } - tags { - tags { - tag { + glossaryTerms { + terms { + term { urn properties { name diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py index 147141e4..300a6b25 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py +++ b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py @@ -11,6 +11,7 @@ DomainRef, EntityRef, FurtherInformation, + GlossaryTermRef, OwnerRef, RelationshipType, TagRef, @@ -70,7 +71,7 @@ def parse_created_and_modified( def parse_tags(entity: dict[str, Any]) -> list[TagRef]: """ - Parse tag information into a flat list of strings for displaying + Parse tag information into a list of TagRef objects for displaying as part of the search result. """ outer_tags = entity.get("tags") or {} @@ -87,6 +88,26 @@ def parse_tags(entity: dict[str, Any]) -> list[TagRef]: return tags +def parse_glossary_terms(entity: dict[str, Any]) -> list[GlossaryTermRef]: + """ + Parse glossary_term information into a list of TagRef for displaying + as part of the search result. + """ + outer_terms = entity.get("glossaryTerms") or {} + terms = [] + for term in outer_terms.get("terms", []): + properties = term.get("term", {}).get("properties", {}) + if properties: + terms.append( + GlossaryTermRef( + display_name=properties.get("name", ""), + urn=term.get("term", {}).get("urn", ""), + description=properties.get("description", ""), + ) + ) + return terms + + def parse_properties( entity: dict[str, Any] ) -> Tuple[dict[str, Any], CustomEntityProperties]: diff --git a/lib/datahub-client/data_platform_catalogue/client/search.py b/lib/datahub-client/data_platform_catalogue/client/search.py index e72572e7..b5b0a1b5 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search.py +++ b/lib/datahub-client/data_platform_catalogue/client/search.py @@ -7,6 +7,7 @@ from data_platform_catalogue.client.graphql_helpers import ( parse_created_and_modified, parse_domain, + parse_glossary_terms, parse_last_modified, parse_names, parse_owner, @@ -207,7 +208,7 @@ def list_database_tables( def _get_data_collection_page_results(self, response, key_for_results: str): """ - for use by entities that hold collections of data, eg. data product and container + for use by entities that hold collections of data, eg. container """ page_results = [] for result in response[key_for_results]["searchResults"]: @@ -254,6 +255,7 @@ def _parse_result( owner = parse_owner(entity) properties, custom_properties = parse_properties(entity) tags = parse_tags(entity) + terms = parse_glossary_terms(entity) last_modified = parse_last_modified(entity) name, display_name, qualified_name = parse_names(entity, properties) @@ -288,7 +290,8 @@ def _parse_result( fully_qualified_name=qualified_name, description=properties.get("description", ""), metadata=metadata, - tags=[tag_str.display_name for tag_str in tags], + tags=tags, + glossary_terms=terms, last_modified=modified or last_modified, ) @@ -359,6 +362,7 @@ def _parse_container(self, entity: dict[str, Any], matches) -> SearchResult: Map a Container entity to a SearchResult """ tags = parse_tags(entity) + terms = parse_glossary_terms(entity) last_modified = parse_last_modified(entity) properties, custom_properties = parse_properties(entity) domain = parse_domain(entity) @@ -387,7 +391,8 @@ def _parse_container(self, entity: dict[str, Any], matches) -> SearchResult: display_name=display_name, description=properties.get("description", ""), metadata=metadata, - tags=[tag.display_name for tag in tags], + tags=tags, + glossary_terms=terms, last_modified=last_modified, ) diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index c559486c..59386b43 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -168,6 +168,25 @@ class TagRef(BaseModel): ) +class GlossaryTermRef(BaseModel): + """ + Reference to a Glossary term + """ + + display_name: str = Field( + description="Glossary term name", + examples=["PII"], + ) + urn: str = Field( + description="The identifier of the glossary term", + examples=["urn:li:glossaryTerm:ESDA"], + ) + description: str = Field( + description="The definition of the glossary term", + examples=["Essential Shared Data Asset"], + ) + + class UsageRestrictions(BaseModel): """ Metadata about how entities may be used. diff --git a/lib/datahub-client/data_platform_catalogue/search_types.py b/lib/datahub-client/data_platform_catalogue/search_types.py index 3e72872a..0ba325e2 100644 --- a/lib/datahub-client/data_platform_catalogue/search_types.py +++ b/lib/datahub-client/data_platform_catalogue/search_types.py @@ -5,6 +5,8 @@ from enum import Enum, auto from typing import Any +from data_platform_catalogue.entities import GlossaryTermRef, TagRef + class ResultType(Enum): """Result type.""" @@ -62,13 +64,16 @@ class SearchResult: description: str = "" matches: dict[str, str] = field(default_factory=dict) metadata: dict[str, Any] = field(default_factory=dict) - tags: list[str] = field(default_factory=list) + tags: list[TagRef] = field(default_factory=list) + glossary_terms: list[GlossaryTermRef] = field(default_factory=list) last_modified: datetime | None = None created: datetime | None = None - tags_to_display: list[str] = field(init=False) + tags_to_display: list[TagRef] = field(init=False) def __post_init__(self): - self.tags_to_display = [tag for tag in self.tags if not tag.startswith("dc_")] + self.tags_to_display = [ + tag for tag in self.tags if not tag.display_name.startswith("dc_") + ] @dataclass diff --git a/lib/datahub-client/tests/client/datahub/test_datahub_client.py b/lib/datahub-client/tests/client/datahub/test_datahub_client.py index bfbb4d98..f3736c08 100644 --- a/lib/datahub-client/tests/client/datahub/test_datahub_client.py +++ b/lib/datahub-client/tests/client/datahub/test_datahub_client.py @@ -229,12 +229,6 @@ def datahub_client(self, base_mock_graph) -> DataHubCatalogueClient: jwt_token="abc", api_url="http://example.com/api/gms", graph=base_mock_graph ) - @pytest.fixture - def golden_file_in_dp(self): - return Path( - Path(__file__).parent / "../../test_resources/golden_data_product_in.json" - ) - @pytest.fixture def golden_file_in_db(self): return Path( @@ -263,7 +257,6 @@ def test_get_dataset( } ], }, - "data_product_relations": {"total": 0, "relationships": []}, "name": "Dataset", "properties": { "name": "Dataset", @@ -379,7 +372,6 @@ def test_get_dataset_minimal_properties( "container_relations": { "total": 0, }, - "data_product_relations": {"total": 0, "relationships": []}, "schemaMetadata": {"fields": []}, } } diff --git a/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py b/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py index 6f860ab0..e8b989f6 100644 --- a/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py +++ b/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py @@ -4,8 +4,10 @@ from data_platform_catalogue.client.graphql_helpers import ( parse_columns, parse_created_and_modified, + parse_glossary_terms, parse_properties, parse_relations, + parse_tags, ) from data_platform_catalogue.entities import ( AccessInformation, @@ -15,7 +17,9 @@ DataSummary, EntityRef, FurtherInformation, + GlossaryTermRef, RelationshipType, + TagRef, UsageRestrictions, ) @@ -324,3 +328,50 @@ def test_parse_properties_with_none_values(): data_summary=DataSummary(row_count=100), further_information=FurtherInformation(), ) + + +def test_parse_tags(): + tag = TagRef(display_name="abc", urn="urn:tag:abc") + result = parse_tags( + { + "tags": { + "tags": [ + { + "tag": { + "urn": tag.urn, + "properties": { + "name": tag.display_name, + }, + } + } + ] + } + } + ) + + assert result == [tag] + + +def test_parse_glossary_terms(): + term = GlossaryTermRef( + display_name="abc", urn="urn:glossaryTerm:abc", description="hello world" + ) + result = parse_glossary_terms( + { + "glossaryTerms": { + "terms": [ + { + "term": { + "urn": term.urn, + "properties": { + "name": term.display_name, + "description": term.description, + }, + } + } + ] + } + } + ) + + assert result == [term] diff --git a/lib/datahub-client/tests/client/datahub/test_search.py b/lib/datahub-client/tests/client/datahub/test_search.py index 318448b8..0355ba3e 100644 --- a/lib/datahub-client/tests/client/datahub/test_search.py +++ b/lib/datahub-client/tests/client/datahub/test_search.py @@ -2,12 +2,12 @@ from unittest.mock import MagicMock import pytest - from data_platform_catalogue.client.search import SearchClient from data_platform_catalogue.entities import ( AccessInformation, DataSummary, FurtherInformation, + TagRef, UsageRestrictions, ) from data_platform_catalogue.search_types import ( @@ -1032,7 +1032,7 @@ def test_search_for_container(mock_graph, searcher): "data_summary": DataSummary(), "further_information": FurtherInformation(), }, - tags=["test"], + tags=[TagRef(display_name="test", urn="urn:li:tag:test")], last_modified=None, created=None, ) @@ -1152,9 +1152,9 @@ def test_tag_to_display(tags, result): "s3_location": "", "row_count": "", }, - tags=tags, + tags=[TagRef(display_name=t, urn=f"urn:tag:{t}") for t in tags], last_modified=None, created=None, ) - assert test_search_result.tags_to_display == result + assert [t.display_name for t in test_search_result.tags_to_display] == result diff --git a/lib/datahub-client/tests/test_integration_with_datahub_server.py b/lib/datahub-client/tests/test_integration_with_datahub_server.py index 3e070a35..58abfebd 100644 --- a/lib/datahub-client/tests/test_integration_with_datahub_server.py +++ b/lib/datahub-client/tests/test_integration_with_datahub_server.py @@ -179,9 +179,7 @@ def test_paginated_search_results_unique(): @runs_on_development_server def test_list_database_tables(): client = DataHubCatalogueClient(jwt_token=jwt_token, api_url=api_url) - assets = client.list_database_tables( - urn="urn:li:dataProduct:my_data_product", count=20 - ) + assets = client.list_database_tables(urn="urn:li:database:foo", count=20) assert assets diff --git a/templates/details_database.html b/templates/details_database.html index 82a787ed..9bf23f29 100644 --- a/templates/details_database.html +++ b/templates/details_database.html @@ -58,10 +58,10 @@