Skip to content

Commit

Permalink
fix: handle v2 vs. v1 field path conversion for lineage comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
sagar-salvi-apptware committed Jul 19, 2024
1 parent 0d71837 commit 6c45be8
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pydantic import validator
from pydantic.fields import Field

from datahub.api.entities.dataset.dataset import Dataset
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.emitter import mce_builder
Expand Down Expand Up @@ -793,14 +794,18 @@ def get_fine_grained_lineages(
schema_metadata: SchemaMetadata,
schema_metadata_for_s3: SchemaMetadata,
) -> Optional[List[FineGrainedLineageClass]]:
def simplify_field_path(field_path):
return Dataset._simplify_field_path(field_path)

if schema_metadata and schema_metadata_for_s3:
fine_grained_lineages: List[FineGrainedLineageClass] = []
for field in schema_metadata.fields:
field_path_v1 = simplify_field_path(field.fieldPath)
matching_s3_field = next(
(
f
for f in schema_metadata_for_s3.fields
if f.fieldPath.split(".")[-1] == field.fieldPath.split(".")[-1]
if f.fieldPath == field_path_v1
),
None,
)
Expand All @@ -810,14 +815,14 @@ def get_fine_grained_lineages(
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
downstreams=[
mce_builder.make_schema_field_urn(
dataset_urn, field.fieldPath.split(".")[-1]
dataset_urn, field_path_v1
)
],
upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
upstreams=[
mce_builder.make_schema_field_urn(
s3_dataset_urn,
matching_s3_field.fieldPath.split(".")[-1],
matching_s3_field.fieldPath,
)
],
)
Expand Down

0 comments on commit 6c45be8

Please sign in to comment.