Skip to content

Commit

Permalink
fix: Increase the compatibility of id structure between the Databuild…
Browse files Browse the repository at this point in the history
…er and the Metadata Library (#445)

* Make id format compatible with metadata service

Signed-off-by: Andrew Ciambrone <[email protected]>

* fixes for dashboard search extractor

Signed-off-by: Andrew Ciambrone <[email protected]>

* fix typo on attr name

Signed-off-by: Andrew Ciambrone <[email protected]>

* Reconcille databuilder with models in common for stat_name and col_type

Signed-off-by: Andrew Ciambrone <[email protected]>
  • Loading branch information
AndrewCiambrone authored Feb 27, 2021
1 parent 3e28137 commit 6a13762
Show file tree
Hide file tree
Showing 31 changed files with 955 additions and 504 deletions.
9 changes: 6 additions & 3 deletions databuilder/extractor/neptune_search_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _user_search_query(graph: GraphTraversalSource, tag_filter: str) -> List[Dic

def _dashboard_search_query(graph: GraphTraversalSource, tag_filter: str) -> List[Dict]:
traversal = graph.V().hasLabel(DashboardMetadata.DASHBOARD_NODE_LABEL)
traversal = traversal.has('full_name')
traversal = traversal.has('name')
if tag_filter:
traversal = traversal.where('published_tag', tag_filter)

Expand Down Expand Up @@ -184,13 +184,16 @@ def _dashboard_search_query(graph: GraphTraversalSource, tag_filter: str) -> Lis
__.constant('')
)) # group_description
traversal = traversal.by(
__.out(DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values('group_url')
__.out(DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values('dashboard_group_url')
) # group_url
traversal = traversal.by('dashboard_url') # dashboard_url
traversal = traversal.by('key') # uri

traversal = traversal.by(
__.out('EXECUTED').has('key', TextP.endingWith('_last_successful_execution')).values('timestamp')
__.coalesce(
__.out('EXECUTED').has('key', TextP.endingWith('_last_successful_execution')).values('timestamp'),
__.constant('')
)
) # last_successful_run_timestamp
traversal = traversal.by(
__.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).values('name').dedup().fold()
Expand Down
2 changes: 1 addition & 1 deletion databuilder/models/es_last_updated.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ESLastUpdated(GraphSerializable):

LABEL = 'Updatedtimestamp'
KEY = 'amundsen_updated_timestamp'
LATEST_TIMESTAMP = 'latest_timestmap'
LATEST_TIMESTAMP = 'latest_timestamp'

def __init__(self,
timestamp: int,
Expand Down
2 changes: 1 addition & 1 deletion databuilder/models/table_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class ColumnMetadata:
COLUMN_NODE_LABEL = 'Column'
COLUMN_KEY_FORMAT = '{db}://{cluster}.{schema}/{tbl}/{col}'
COLUMN_NAME = 'name'
COLUMN_TYPE = 'type'
COLUMN_TYPE = 'col_type'
COLUMN_ORDER = 'sort_order'
COLUMN_DESCRIPTION = 'description'
COLUMN_DESCRIPTION_FORMAT = '{db}://{cluster}.{schema}/{tbl}/{col}/{description_id}'
Expand Down
8 changes: 4 additions & 4 deletions databuilder/models/table_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class TableColumnStats(GraphSerializable):
"""
LABEL = 'Stat'
KEY_FORMAT = '{db}://{cluster}.{schema}' \
'/{table}/{col}/{stat_name}/'
'/{table}/{col}/{stat_type}/'
STAT_Column_RELATION_TYPE = 'STAT_OF'
Column_STAT_RELATION_TYPE = 'STAT'

Expand All @@ -40,7 +40,7 @@ def __init__(self,
self.start_epoch = start_epoch
self.end_epoch = end_epoch
self.cluster = cluster
self.stat_name = stat_name
self.stat_type = stat_name
self.stat_val = str(stat_val)
self._node_iter = iter(self.create_nodes())
self._relation_iter = iter(self.create_relation())
Expand All @@ -64,7 +64,7 @@ def get_table_stat_model_key(self) -> str:
schema=self.schema,
table=self.table,
col=self.col_name,
stat_name=self.stat_name)
stat_type=self.stat_type)

def get_col_key(self) -> str:
# no cluster, schema info from the input
Expand All @@ -84,7 +84,7 @@ def create_nodes(self) -> List[GraphNode]:
label=TableColumnStats.LABEL,
attributes={
'stat_val': self.stat_val,
'stat_name': self.stat_name,
'stat_type': self.stat_type,
'start_epoch': self.start_epoch,
'end_epoch': self.end_epoch,
}
Expand Down
61 changes: 47 additions & 14 deletions databuilder/serializers/neptune_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
NEPTUNE_RELATIONSHIP_HEADER_TO = "~to"


METADATA_KEY_PROPERTY_NAME = 'key:String(single)'
# last seen property names
NEPTUNE_LAST_EXTRACTED_AT_NODE_PROPERTY_NAME = "last_extracted_datetime"
NEPTUNE_LAST_EXTRACTED_AT_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT = "{name}:Date(single)".format(
Expand Down Expand Up @@ -46,30 +47,33 @@ def convert_relationship(relationship: Optional[GraphRelationship]) -> List[Dict
if relationship.start_key == '' or relationship.end_key == '':
return []

relation_id = "{from_vertex_id}_{to_vertex_id}_{label}".format(
from_vertex_id=relationship.start_key,
to_vertex_id=relationship.end_key,
label=relationship.type
neptune_start_key = "{label}:{key}".format(
label=relationship.start_label,
key=relationship.start_key
)
relation_id_reverse = "{from_vertex_id}_{to_vertex_id}_{label}".format(
from_vertex_id=relationship.end_key,
to_vertex_id=relationship.start_key,
label=relationship.reverse_type
neptune_end_key = "{label}:{key}".format(
label=relationship.end_label,
key=relationship.end_key
)
relation_id = get_forward_relationship_id(relationship)
relation_id_reverse = get_reverse_relationship_id(relationship)
current_string_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S')
forward_relationship_doc = {
NEPTUNE_HEADER_ID: relation_id,
NEPTUNE_RELATIONSHIP_HEADER_FROM: relationship.start_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: relationship.end_key,
METADATA_KEY_PROPERTY_NAME: relation_id,
NEPTUNE_RELATIONSHIP_HEADER_FROM: neptune_start_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: neptune_end_key,
NEPTUNE_HEADER_LABEL: relationship.type,
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: current_string_time,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB,

}

reverse_relationship_doc = {
NEPTUNE_HEADER_ID: relation_id_reverse,
NEPTUNE_RELATIONSHIP_HEADER_FROM: relationship.end_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: relationship.start_key,
METADATA_KEY_PROPERTY_NAME: relation_id_reverse,
NEPTUNE_RELATIONSHIP_HEADER_FROM: neptune_end_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: neptune_start_key,
NEPTUNE_HEADER_LABEL: relationship.reverse_type,
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: current_string_time,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
Expand All @@ -90,6 +94,26 @@ def convert_relationship(relationship: Optional[GraphRelationship]) -> List[Dict
]


def get_forward_relationship_id(relationship: GraphRelationship) -> str:
return "{label}:{from_vertex_label}:{from_vertex_id}_{to_vertex_label}:{to_vertex_id}".format(
from_vertex_id=relationship.start_key,
from_vertex_label=relationship.start_label,
to_vertex_id=relationship.end_key,
to_vertex_label=relationship.end_label,
label=relationship.type
)


def get_reverse_relationship_id(relationship: GraphRelationship) -> str:
return "{label}:{from_vertex_label}:{from_vertex_id}_{to_vertex_label}:{to_vertex_id}".format(
to_vertex_id=relationship.start_key,
to_vertex_label=relationship.start_label,
from_vertex_id=relationship.end_key,
from_vertex_label=relationship.end_label,
label=relationship.reverse_type
)


def convert_node(node: Optional[GraphNode]) -> Dict[str, Any]:
if node is None:
return {}
Expand All @@ -98,8 +122,10 @@ def convert_node(node: Optional[GraphNode]) -> Dict[str, Any]:
return {}

current_string_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S')
node_id = get_node_id(node)
node_dict = {
NEPTUNE_HEADER_ID: node.key,
NEPTUNE_HEADER_ID: node_id,
METADATA_KEY_PROPERTY_NAME: node_id,
NEPTUNE_HEADER_LABEL: node.label,
NEPTUNE_LAST_EXTRACTED_AT_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: current_string_time,
NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
Expand All @@ -117,6 +143,13 @@ def convert_node(node: Optional[GraphNode]) -> Dict[str, Any]:
return node_dict


def get_node_id(node: GraphNode) -> str:
return "{label}:{key}".format(
label=node.label,
key=node.key
)


def _get_neptune_type_for_value(value: Any) -> Optional[str]:
if isinstance(value, six.string_types):
return "String"
Expand Down
38 changes: 25 additions & 13 deletions tests/unit/models/dashboard/test_dashboard_chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)
from databuilder.serializers import neo4_serializer, neptune_serializer
from databuilder.serializers.neptune_serializer import (
NEPTUNE_CREATION_TYPE_JOB, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT,
METADATA_KEY_PROPERTY_NAME, NEPTUNE_CREATION_TYPE_JOB, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT, NEPTUNE_HEADER_ID, NEPTUNE_HEADER_LABEL,
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT, NEPTUNE_RELATIONSHIP_HEADER_FROM,
NEPTUNE_RELATIONSHIP_HEADER_TO,
Expand Down Expand Up @@ -44,7 +44,8 @@ def test_create_nodes(self) -> None:
'LABEL': 'Chart'
}
neptune_expected = {
'~id': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
'~id': 'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
METADATA_KEY_PROPERTY_NAME: 'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
'~label': 'Chart',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB,
Expand Down Expand Up @@ -77,7 +78,8 @@ def test_create_nodes(self) -> None:
'url': 'http://gold.foo.bar/'
}
neptune_expected2 = {
'~id': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
'~id': 'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
METADATA_KEY_PROPERTY_NAME: 'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
'~label': 'Chart',
'id:String(single)': 'c_id',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
Expand Down Expand Up @@ -112,26 +114,36 @@ def test_create_relation(self) -> None:
}

neptune_forward_expected = {
NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format(
from_vertex_id=start_key,
to_vertex_id=end_key,
NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id="Query:" + start_key,
to_vertex_id="Chart:" + end_key,
label='HAS_CHART'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: start_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: end_key,
METADATA_KEY_PROPERTY_NAME: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id="Query:" + start_key,
to_vertex_id="Chart:" + end_key,
label='HAS_CHART'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: "Query:" + start_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: "Chart:" + end_key,
NEPTUNE_HEADER_LABEL: 'HAS_CHART',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
}

neptune_reversed_expected = {
NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format(
from_vertex_id=end_key,
to_vertex_id=start_key,
NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id="Chart:" + end_key,
to_vertex_id="Query:" + start_key,
label='CHART_OF'
),
METADATA_KEY_PROPERTY_NAME: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id="Chart:" + end_key,
to_vertex_id="Query:" + start_key,
label='CHART_OF'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: end_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: start_key,
NEPTUNE_RELATIONSHIP_HEADER_FROM: "Chart:" + end_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: "Query:" + start_key,
NEPTUNE_HEADER_LABEL: 'CHART_OF',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
Expand Down
37 changes: 24 additions & 13 deletions tests/unit/models/dashboard/test_dashboard_last_modified.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)
from databuilder.serializers import neo4_serializer, neptune_serializer
from databuilder.serializers.neptune_serializer import (
NEPTUNE_CREATION_TYPE_JOB, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT,
METADATA_KEY_PROPERTY_NAME, NEPTUNE_CREATION_TYPE_JOB, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT, NEPTUNE_HEADER_ID, NEPTUNE_HEADER_LABEL,
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT, NEPTUNE_RELATIONSHIP_HEADER_FROM,
NEPTUNE_RELATIONSHIP_HEADER_TO,
Expand Down Expand Up @@ -55,7 +55,8 @@ def test_neptune_dashboard_timestamp_nodes(self) -> None:
actual = self.dashboard_last_modified.create_next_node()
actual_neptune_serialized = neptune_serializer.convert_node(actual)
neptune_expected = {
NEPTUNE_HEADER_ID: self.expected_ts_key,
NEPTUNE_HEADER_ID: 'Timestamp:' + self.expected_ts_key,
METADATA_KEY_PROPERTY_NAME: 'Timestamp:' + self.expected_ts_key,
NEPTUNE_HEADER_LABEL: 'Timestamp',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB,
Expand Down Expand Up @@ -87,31 +88,41 @@ def test_dashboard_owner_relations_neptune(self) -> None:
actual = self.dashboard_last_modified.create_next_relation()
actual_serialized = neptune_serializer.convert_relationship(actual)
neptune_forward_expected = {
NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format(
from_vertex_id=self.expected_dashboard_key,
to_vertex_id=self.expected_ts_key,
NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id='Dashboard:' + self.expected_dashboard_key,
to_vertex_id='Timestamp:' + self.expected_ts_key,
label='LAST_UPDATED_AT'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: self.expected_dashboard_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: self.expected_ts_key,
METADATA_KEY_PROPERTY_NAME: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id='Dashboard:' + self.expected_dashboard_key,
to_vertex_id='Timestamp:' + self.expected_ts_key,
label='LAST_UPDATED_AT'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Dashboard:' + self.expected_dashboard_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: 'Timestamp:' + self.expected_ts_key,
NEPTUNE_HEADER_LABEL: 'LAST_UPDATED_AT',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
}

neptune_reversed_expected = {
NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format(
from_vertex_id=self.expected_ts_key,
to_vertex_id=self.expected_dashboard_key,
NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id='Timestamp:' + self.expected_ts_key,
to_vertex_id='Dashboard:' + self.expected_dashboard_key,
label='LAST_UPDATED_TIME_OF'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: self.expected_ts_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: self.expected_dashboard_key,
METADATA_KEY_PROPERTY_NAME: "{label}:{from_vertex_id}_{to_vertex_id}".format(
from_vertex_id='Timestamp:' + self.expected_ts_key,
to_vertex_id='Dashboard:' + self.expected_dashboard_key,
label='LAST_UPDATED_TIME_OF'
),
NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Timestamp:' + self.expected_ts_key,
NEPTUNE_RELATIONSHIP_HEADER_TO: 'Dashboard:' + self.expected_dashboard_key,
NEPTUNE_HEADER_LABEL: 'LAST_UPDATED_TIME_OF',
NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
}

self.maxDiff = None
assert actual is not None
self.assertDictEqual(actual_serialized[0], neptune_forward_expected)
self.assertDictEqual(actual_serialized[1], neptune_reversed_expected)
Expand Down
Loading

0 comments on commit 6a13762

Please sign in to comment.