Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup - bring through tags and glossary terms consistently, and remove dead code for data products #418

Merged
merged 5 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
- name: run selenium tests
id: slow-tests
if: steps.fast-tests.outcome == 'success'
run: poetry run pytest tests/selenium --axe-version 4.9.0
run: poetry run pytest tests/selenium --axe-version 4.9.1 --chromedriver-path /usr/local/bin/chromedriver

javascript:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion home/forms/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def create_option(


class SearchForm(forms.Form):
"""Django form to represent data product search page inputs"""
"""Django form to represent search page inputs"""

query = forms.CharField(
max_length=100,
Expand Down
4 changes: 1 addition & 3 deletions home/service/details.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ def __init__(self, urn: str):
if parents:
# Pick the first entity to use as the parent in the breadcrumb.
# If the dataset belongs to multiple parents, this may diverge
# from the path the user took to get to this page. However as of datahub
# v0.12, assigning to multiple data products is not possible and we don't
# have datasets with multiple parent containers.
# from the path the user took to get to this page.
self.parent_entity = parents[0]
self.dataset_parent_type = ResultType.DATABASE.name.lower()
else:
Expand Down
11 changes: 11 additions & 0 deletions lib/datahub-client/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Return domain metadata for Charts
- Add `glossary_terms` list to `SearchResult`

### Changed

- Return lists of objects for `SearchResult.tags` and `SearchResult.tags_to_display` instead of strings.

## Removed

- Removed all remaining references to Data Products

## [1.0.1] 2024-05-07

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,6 @@ query getDatasetDetails($urn: String!) {
}
}
}
data_product_relations: relationships(
input: { types: ["DataProductContains"], direction: INCOMING, count: 10 }
) {
total
relationships {
entity {
urn
... on DataProduct {
properties {
name
}
}
}
}
}
ownership {
owners {
owner {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,28 @@ query Search(
}
}
}
tags {
tags {
tag {
urn
properties {
name
description
}
}
}
}
glossaryTerms {
terms {
term {
urn
properties {
name
description
}
}
}
}
properties {
name
description
Expand Down Expand Up @@ -109,25 +131,6 @@ query Search(
subTypes {
typeNames
}
relationships(
input: {
types: ["DataProductContains"]
direction: INCOMING
count: 10
}
) {
total
relationships {
entity {
urn
... on DataProduct {
properties {
name
}
}
}
}
}
ownership {
owners {
owner {
Expand Down Expand Up @@ -177,6 +180,17 @@ query Search(
}
}
}
glossaryTerms {
terms {
term {
urn
properties {
name
description
}
}
}
}
lastIngested
domain {
domain {
Expand All @@ -189,9 +203,12 @@ query Search(
}
}
}
... on DataProduct {
MatMoore marked this conversation as resolved.
Show resolved Hide resolved
... on Container {
urn
type
subTypes {
typeNames
}
ownership {
owners {
owner {
Expand Down Expand Up @@ -219,7 +236,6 @@ query Search(
key
value
}
numAssets
}
domain {
domain {
Expand All @@ -242,54 +258,9 @@ query Search(
}
}
}
}
... on Container {
urn
type
subTypes {
typeNames
}
ownership {
owners {
owner {
... on CorpUser {
urn
properties {
fullName
email
}
}
... on CorpGroup {
urn
properties {
displayName
email
}
}
}
}
}
properties {
name
description
customProperties {
key
value
}
}
domain {
domain {
urn
id
properties {
name
description
}
}
}
tags {
tags {
tag {
glossaryTerms {
terms {
term {
urn
properties {
name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
DomainRef,
EntityRef,
FurtherInformation,
GlossaryTermRef,
OwnerRef,
RelationshipType,
TagRef,
Expand Down Expand Up @@ -70,7 +71,7 @@ def parse_created_and_modified(

def parse_tags(entity: dict[str, Any]) -> list[TagRef]:
"""
Parse tag information into a flat list of strings for displaying
Parse tag information into a list of TagRef objects for displaying
as part of the search result.
"""
outer_tags = entity.get("tags") or {}
Expand All @@ -87,6 +88,26 @@ def parse_tags(entity: dict[str, Any]) -> list[TagRef]:
return tags


def parse_glossary_terms(entity: dict[str, Any]) -> list[GlossaryTermRef]:
"""
Parse glossary_term information into a list of TagRef for displaying
as part of the search result.
"""
outer_terms = entity.get("glossaryTerms") or {}
terms = []
for term in outer_terms.get("terms", []):
properties = term.get("term", {}).get("properties", {})
if properties:
terms.append(
GlossaryTermRef(
display_name=properties.get("name", ""),
urn=term.get("term", {}).get("urn", ""),
description=properties.get("description", ""),
)
)
return terms


def parse_properties(
entity: dict[str, Any]
) -> Tuple[dict[str, Any], CustomEntityProperties]:
Expand Down
11 changes: 8 additions & 3 deletions lib/datahub-client/data_platform_catalogue/client/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from data_platform_catalogue.client.graphql_helpers import (
parse_created_and_modified,
parse_domain,
parse_glossary_terms,
parse_last_modified,
parse_names,
parse_owner,
Expand Down Expand Up @@ -207,7 +208,7 @@ def list_database_tables(

def _get_data_collection_page_results(self, response, key_for_results: str):
"""
for use by entities that hold collections of data, eg. data product and container
for use by entities that hold collections of data, eg. container
"""
page_results = []
for result in response[key_for_results]["searchResults"]:
Expand Down Expand Up @@ -254,6 +255,7 @@ def _parse_result(
owner = parse_owner(entity)
properties, custom_properties = parse_properties(entity)
tags = parse_tags(entity)
terms = parse_glossary_terms(entity)
last_modified = parse_last_modified(entity)
name, display_name, qualified_name = parse_names(entity, properties)

Expand Down Expand Up @@ -288,7 +290,8 @@ def _parse_result(
fully_qualified_name=qualified_name,
description=properties.get("description", ""),
metadata=metadata,
tags=[tag_str.display_name for tag_str in tags],
tags=tags,
glossary_terms=terms,
last_modified=modified or last_modified,
)

Expand Down Expand Up @@ -359,6 +362,7 @@ def _parse_container(self, entity: dict[str, Any], matches) -> SearchResult:
Map a Container entity to a SearchResult
"""
tags = parse_tags(entity)
terms = parse_glossary_terms(entity)
last_modified = parse_last_modified(entity)
properties, custom_properties = parse_properties(entity)
domain = parse_domain(entity)
Expand Down Expand Up @@ -387,7 +391,8 @@ def _parse_container(self, entity: dict[str, Any], matches) -> SearchResult:
display_name=display_name,
description=properties.get("description", ""),
metadata=metadata,
tags=[tag.display_name for tag in tags],
tags=tags,
glossary_terms=terms,
last_modified=last_modified,
)

Expand Down
19 changes: 19 additions & 0 deletions lib/datahub-client/data_platform_catalogue/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,25 @@ class TagRef(BaseModel):
)


class GlossaryTermRef(BaseModel):
"""
Reference to a Glossary term
"""

display_name: str = Field(
description="Glossary term name",
examples=["PII"],
)
urn: str = Field(
description="The identifier of the glossary term",
examples=["urn:li:glossaryTerm:ESDA"],
)
description: str = Field(
description="The definition of the glossary term",
examples=["Essential Shared Data Asset"],
)


class UsageRestrictions(BaseModel):
"""
Metadata about how entities may be used.
Expand Down
11 changes: 8 additions & 3 deletions lib/datahub-client/data_platform_catalogue/search_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from enum import Enum, auto
from typing import Any

from data_platform_catalogue.entities import GlossaryTermRef, TagRef


class ResultType(Enum):
"""Result type."""
Expand Down Expand Up @@ -62,13 +64,16 @@ class SearchResult:
description: str = ""
matches: dict[str, str] = field(default_factory=dict)
metadata: dict[str, Any] = field(default_factory=dict)
tags: list[str] = field(default_factory=list)
tags: list[TagRef] = field(default_factory=list)
glossary_terms: list[GlossaryTermRef] = field(default_factory=list)
last_modified: datetime | None = None
created: datetime | None = None
tags_to_display: list[str] = field(init=False)
tags_to_display: list[TagRef] = field(init=False)

def __post_init__(self):
self.tags_to_display = [tag for tag in self.tags if not tag.startswith("dc_")]
self.tags_to_display = [
tag for tag in self.tags if not tag.display_name.startswith("dc_")
]


@dataclass
Expand Down
Loading