Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fmd 366 add dataset lineage link #416

Merged
merged 20 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions home/service/details.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from data_platform_catalogue.entities import RelationshipType
from data_platform_catalogue.search_types import MultiSelectFilter, ResultType
from django.core.exceptions import ObjectDoesNotExist
from urllib.parse import urlsplit

from .base import GenericService

Expand Down Expand Up @@ -86,13 +88,32 @@ def __init__(self, urn: str):
self.context = self._get_context()

def _get_context(self):
split_datahub_url = urlsplit(
os.getenv("CATALOGUE_URL", "https://test-catalogue.gov.uk")
)

return {
"table": self.table_metadata,
"parent_entity": self.parent_entity,
"dataset_parent_type": self.dataset_parent_type,
"h1_value": "Details",
"has_lineage": self.has_lineage(),
"lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&",
}

def has_lineage(self) -> bool:
"""
Inspects the relationships property of the Table model to establish if a
Dataset has any lineage recorded in datahub.
"""
has_lineage = (
MatMoore marked this conversation as resolved.
Show resolved Hide resolved
len(
self.table_metadata.relationships.get(RelationshipType.DATA_LINEAGE, [])
)
> 0
)
return has_lineage


class ChartDetailsService(GenericService):
def __init__(self, urn: str):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,21 +232,23 @@ def get_table_details(self, urn) -> Table:
created, modified = parse_created_and_modified(properties)
name, display_name, qualified_name = parse_names(response, properties)

# A dataset can't have multiple parents, but if we did
# start to use in that we'd need to change this
if response["container_relations"]["total"] > 0:
relations = parse_relations(
RelationshipType.PARENT, response["container_relations"]
)
else:
relations = {}
lineage_relations = parse_relations(
RelationshipType.DATA_LINEAGE,
[
response.get("downstream_lineage_relations", {}),
response.get("upstream_lineage_relations", {}),
],
)
parent_relations = parse_relations(
RelationshipType.PARENT, [response["parent_container_relations"]]
)
return Table(
urn=urn,
display_name=display_name,
name=name,
fully_qualified_name=qualified_name,
description=properties.get("description", ""),
relationships=relations,
relationships={**lineage_relations, **parent_relations},
domain=domain,
governance=Governance(
data_owner=owner,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,50 @@ query getDatasetDetails($urn: String!) {
}
}
}
downstream_lineage_relations: lineage (
input: {direction: DOWNSTREAM
start:0,
count:10}
) {
total
relationships{
type
entity{
urn
... on Dataset {
name
properties{
name
}
}
type
}
}
}
upstream_lineage_relations: lineage (
input: {direction: UPSTREAM
start:0,
count:10}
) {
total
relationships{
type
entity{
urn
... on Dataset {
name
properties{
name
}
}
type
}
}
}
subTypes {
typeNames
}
container_relations: relationships(
parent_container_relations: relationships(
input: { types: ["IsPartOf"], direction: OUTGOING, count: 10 }
) {
total
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,21 +256,26 @@ def parse_columns(entity: dict[str, Any]) -> list[Column]:


def parse_relations(
relationship_type: RelationshipType, relations_dict: dict
relationship_type: RelationshipType, relations_list: list[dict]
) -> dict[RelationshipType, list[EntityRef]]:
"""
parse the relationships results returned from a graphql querys
"""
# we may want to do soemthing with total realtion if we are returning child

# we may want to do something with total relations if we are returning child
# relations and need to paginate through relations - 10 relations returned as is
# total_relations = relations_dict.get("total", 0)
parent_entities = relations_dict.get("relationships", [])
related_entities = [
EntityRef(
urn=i["entity"]["urn"], display_name=i["entity"]["properties"]["name"]
)
for i in parent_entities
]
# There may be more than 10 lineage entities but since we currently only care
# if lineage exists for a dataset we don't need to capture everything
related_entities = []
for j in relations_list:
for i in j["relationships"]:
urn = i.get("entity").get("urn")
display_name = (
i.get("entity").get("properties").get("name")
if i.get("entity", {}).get("properties") is not None
else i.get("entity").get("name")
)
related_entities.append(EntityRef(urn=urn, display_name=display_name))

relations_return = {relationship_type: related_entities}
return relations_return
5 changes: 0 additions & 5 deletions lib/datahub-client/data_platform_catalogue/client/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
parse_names,
parse_owner,
parse_properties,
parse_relations,
parse_tags,
)
from data_platform_catalogue.entities import RelationshipType
Expand Down Expand Up @@ -259,16 +258,12 @@ def _parse_result(
last_modified = parse_last_modified(entity)
name, display_name, qualified_name = parse_names(entity, properties)

relations = parse_relations(
RelationshipType.PARENT, entity.get("relationships", {})
)
domain = parse_domain(entity)

metadata = {
"owner": owner.display_name,
"owner_email": owner.email,
"total_parents": entity.get("relationships", {}).get("total", 0),
"parents": relations[RelationshipType.PARENT],
MatMoore marked this conversation as resolved.
Show resolved Hide resolved
"domain_name": domain.display_name,
"domain_id": domain.urn,
"entity_types": self._parse_types_and_sub_types(entity, "Dataset"),
Expand Down
1 change: 1 addition & 0 deletions lib/datahub-client/data_platform_catalogue/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
class RelationshipType(Enum):
PARENT = "PARENT"
PLATFORM = "PLATFORM"
DATA_LINEAGE = "DATA_LINEAGE"


class EntityRef(BaseModel):
Expand Down
24 changes: 17 additions & 7 deletions lib/datahub-client/tests/client/datahub/test_datahub_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,17 +243,22 @@ def test_get_dataset(
urn = "abc"
datahub_response = {
"dataset": {
"type": "DATASET",
"platform": {"name": "datahub"},
"ownership": None,
"subTypes": None,
"container_relations": {
"downstream_lineage_relations": {"total": 0, "relationships": []},
"upstream_lineage_relations": {"total": 0, "relationships": []},
"parent_container_relations": {
"total": 1,
"relationships": [
{
"type": "IsPartOf",
"direction": "OUTGOING",
"entity": {
"urn": "urn:li:container:database",
"properties": {"name": "database"},
}
},
}
],
},
Expand Down Expand Up @@ -325,7 +330,8 @@ def test_get_dataset(
relationships={
RelationshipType.PARENT: [
EntityRef(urn="urn:li:container:database", display_name="database")
]
],
RelationshipType.DATA_LINEAGE: [],
},
domain=DomainRef(display_name="", urn=""),
governance=Governance(
Expand Down Expand Up @@ -369,9 +375,10 @@ def test_get_dataset_minimal_properties(
"platform": {"name": "datahub"},
"name": "notinproperties",
"properties": {},
"container_relations": {
"total": 0,
},
"downstream_lineage_relations": {"total": 0, "relationships": []},
"upstream_lineage_relations": {"total": 0, "relationships": []},
"parent_container_relations": {"total": 0, "relationships": []},
"data_product_relations": {"total": 0, "relationships": []},
"schemaMetadata": {"fields": []},
}
}
Expand All @@ -389,7 +396,10 @@ def test_get_dataset_minimal_properties(
name="notinproperties",
fully_qualified_name="notinproperties",
description="",
relationships={},
relationships={
RelationshipType.PARENT: [],
RelationshipType.DATA_LINEAGE: [],
},
domain=DomainRef(display_name="", urn=""),
governance=Governance(
data_owner=OwnerRef(display_name="", email="", urn=""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def test_parse_relations():
],
}
}
result = parse_relations(RelationshipType.PARENT, relations["relationships"])
result = parse_relations(RelationshipType.PARENT, [relations["relationships"]])
assert result == {
RelationshipType.PARENT: [
EntityRef(urn="urn:li:dataProduct:test", display_name="test")
Expand All @@ -201,7 +201,7 @@ def test_parse_relations():

def test_parse_relations_blank():
relations = {"relationships": {"total": 0, "relationships": []}}
result = parse_relations(RelationshipType.PARENT, relations["relationships"])
result = parse_relations(RelationshipType.PARENT, [relations["relationships"]])
assert result == {RelationshipType.PARENT: []}


Expand Down
9 changes: 0 additions & 9 deletions lib/datahub-client/tests/client/datahub/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def test_one_search_result(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "HMPPS",
"domain_id": "urn:li:domain:3dc18e48-c062-4407-84a9-73e23f768023",
"entity_types": {
Expand Down Expand Up @@ -210,7 +209,6 @@ def test_dataset_result(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "HMPPS",
"domain_id": "urn:li:domain:3dc18e48-c062-4407-84a9-73e23f768023",
"entity_types": {
Expand Down Expand Up @@ -302,7 +300,6 @@ def test_full_page(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -334,7 +331,6 @@ def test_full_page(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -364,7 +360,6 @@ def test_full_page(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -444,7 +439,6 @@ def test_query_match(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -520,7 +514,6 @@ def test_result_with_owner(mock_graph, searcher):
"owner": "Shannon Lovett",
"owner_email": "[email protected]",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -891,7 +884,6 @@ def test_search_for_charts(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -1091,7 +1083,6 @@ def test_list_database_tables(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down
14 changes: 11 additions & 3 deletions templates/details_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,17 @@ <h3 class="govuk-heading-s govuk-!-margin-top-3">
<h2 class="govuk-heading-m">Table schema</h2>
<p class="govuk-body">The schema for this table is not available.</p>
{% endif %}


</div>
{% if has_lineage %}
<h2 class="govuk-heading-m">Lineage</h2>
<div class="govuk-body-m" >
If you are interested to find out what data were used to create this table or if this table is used to create any further tables, you can see that information via the lineage.
</div class="govuk-body-m">
<div class="govuk-body">
<a href="{{lineage_url}}" class="govuk-link">
View lineage in DataHub
</a>
</div>
{% endif %}
</div>

{% endblock content %}
4 changes: 2 additions & 2 deletions tests/benchmark/test_exact_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
"query,expected_urn",
[
(
"prison_population_history.chunk_assignment",
"urn:li:dataset:(urn:li:dataPlatform:dbt,cadet.awsdatacatalog.prison_population_history.chunk_assignment,PROD)",
"bold_common_platform_linked_tables.all_offence",
"urn:li:dataset:(urn:li:dataPlatform:dbt,cadet.awsdatacatalog.bold_common_platform_linked_tables.all_offence,PROD)",
),
(
"Accommodation on the first night following release",
Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ def generate_table_metadata(
name=name or fake.unique.name(),
fully_qualified_name="Foo.Dataset",
description=description or fake.paragraph(),
relationships=relations or {RelationshipType.PARENT: []},
relationships=relations
or {RelationshipType.PARENT: [], RelationshipType.DATA_LINEAGE: []},
domain=DomainRef(display_name="LAA", urn="LAA"),
governance=Governance(
data_owner=OwnerRef(
Expand Down
Loading