From 21a8718b1093352bc1e3a566d2ce0297d2167434 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 29 Sep 2022 05:00:05 +0530 Subject: [PATCH 01/76] feat(ingest): add column-level lineage support for snowflake (#6034) --- .../source/snowflake/snowflake_lineage.py | 286 ++++++++++++++---- .../source/snowflake/snowflake_query.py | 12 +- .../source/snowflake/snowflake_usage_v2.py | 5 +- .../source/snowflake/snowflake_v2.py | 15 +- 4 files changed, 261 insertions(+), 57 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py index 3515ce63885e09..0961f16025645c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage.py @@ -1,8 +1,10 @@ import json import logging from collections import defaultdict -from typing import Dict, List, Optional, Set, Tuple +from dataclasses import dataclass, field +from typing import Dict, FrozenSet, List, Optional, Set, Tuple +from pydantic.error_wrappers import ValidationError from snowflake.connector import SnowflakeConnection import datahub.emitter.mce_builder as builder @@ -10,20 +12,128 @@ from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report +from datahub.ingestion.source.snowflake.snowflake_usage_v2 import ( + SnowflakeColumnReference, +) from datahub.ingestion.source.snowflake.snowflake_utils import ( SnowflakeCommonMixin, SnowflakeQueryMixin, ) -from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage +from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + FineGrainedLineage, + FineGrainedLineageDownstreamType, + FineGrainedLineageUpstreamType, + UpstreamLineage, +) from datahub.metadata.schema_classes import DatasetLineageTypeClass, UpstreamClass from datahub.utilities.perf_timer import PerfTimer logger: logging.Logger = logging.getLogger(__name__) +class SnowflakeColumnWithLineage(SnowflakeColumnReference): + directSourceColumns: Optional[List[SnowflakeColumnReference]] = None + + +@dataclass(frozen=True) +class SnowflakeColumnId: + columnName: str + objectName: str + objectDomain: Optional[str] = None + + +@dataclass(frozen=True) +class SnowflakeColumnFineGrainedLineage: + """ + Fie grained upstream of column, + which represents a transformation applied on input columns""" + + inputColumns: FrozenSet[SnowflakeColumnId] + # Transform function, query etc can be added here + + +@dataclass +class SnowflakeColumnUpstreams: + """All upstreams of a column""" + + upstreams: Set[SnowflakeColumnFineGrainedLineage] = field( + default_factory=set, init=False + ) + + def update_column_lineage( + self, directSourceColumns: List[SnowflakeColumnReference] + ) -> None: + input_columns = frozenset( + [ + SnowflakeColumnId( + upstream_col.columnName, + upstream_col.objectName, + upstream_col.objectDomain, + ) + for upstream_col in directSourceColumns + if upstream_col.objectName + ] + ) + if not input_columns: + return + upstream = SnowflakeColumnFineGrainedLineage(inputColumns=input_columns) + if upstream not in self.upstreams: + self.upstreams.add(upstream) + + +@dataclass +class SnowflakeUpstreamTable: + upstreamDataset: str + upstreamColumns: List[SnowflakeColumnReference] + downstreamColumns: List[SnowflakeColumnWithLineage] + + @classmethod + def from_dict(cls, dataset, upstreams_columns_dict, downstream_columns_dict): + try: + table_with_upstreams = cls( + dataset, + [ + SnowflakeColumnReference.parse_obj(col) + for col in upstreams_columns_dict + ], + [ + SnowflakeColumnWithLineage.parse_obj(col) + for col in downstream_columns_dict + ], + ) + except ValidationError: + # Earlier versions of column lineage did not include columnName, only columnId + table_with_upstreams = cls(dataset, [], []) + return table_with_upstreams + + +@dataclass +class SnowflakeTableLineage: + # key: upstream table name + upstreamTables: Dict[str, SnowflakeUpstreamTable] = field( + default_factory=dict, init=False + ) + + # key: downstream column name + columnLineages: Dict[str, SnowflakeColumnUpstreams] = field( + default_factory=lambda: defaultdict(SnowflakeColumnUpstreams), init=False + ) + + def update_lineage(self, table: SnowflakeUpstreamTable) -> None: + if table.upstreamDataset not in self.upstreamTables.keys(): + self.upstreamTables[table.upstreamDataset] = table + + if table.downstreamColumns: + for col in table.downstreamColumns: + if col.directSourceColumns: + self.columnLineages[col.columnName].update_column_lineage( + col.directSourceColumns + ) + + class SnowflakeLineageExtractor(SnowflakeQueryMixin, SnowflakeCommonMixin): def __init__(self, config: SnowflakeV2Config, report: SnowflakeV2Report) -> None: - self._lineage_map: Optional[Dict[str, List[Tuple[str, str, str]]]] = None + self._lineage_map: Optional[Dict[str, SnowflakeTableLineage]] = None self._external_lineage_map: Optional[Dict[str, Set[str]]] = None self.config = config self.platform = "snowflake" @@ -54,49 +164,103 @@ def _get_upstream_lineage_info( lineage = self._lineage_map[dataset_name] external_lineage = self._external_lineage_map[dataset_name] - if not (lineage or external_lineage): + if not (lineage.upstreamTables or lineage.columnLineages or external_lineage): logger.debug(f"No lineage found for {dataset_name}") return None upstream_tables: List[UpstreamClass] = [] + finegrained_lineages: List[FineGrainedLineage] = [] + fieldset_finegrained_lineages: List[FineGrainedLineage] = [] column_lineage: Dict[str, str] = {} - for lineage_entry in lineage: + dataset_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + for lineage_entry in sorted( + lineage.upstreamTables.values(), key=lambda x: x.upstreamDataset + ): # Update the table-lineage - upstream_table_name = lineage_entry[0] - if not self._is_dataset_pattern_allowed(upstream_table_name, "table"): - continue + upstream_table_name = lineage_entry.upstreamDataset + upstream_table_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + upstream_table_name, + self.config.platform_instance, + self.config.env, + ) upstream_table = UpstreamClass( - dataset=builder.make_dataset_urn_with_platform_instance( - self.platform, - upstream_table_name, - self.config.platform_instance, - self.config.env, - ), + dataset=upstream_table_urn, type=DatasetLineageTypeClass.TRANSFORMED, ) upstream_tables.append(upstream_table) - # Update column-lineage for each down-stream column. - upstream_columns = [ - self.snowflake_identifier(d["columnName"]) - for d in json.loads(lineage_entry[1]) - ] - downstream_columns = [ - self.snowflake_identifier(d["columnName"]) - for d in json.loads(lineage_entry[2]) - ] - upstream_column_str = ( - f"{upstream_table_name}({', '.join(sorted(upstream_columns))})" - ) - downstream_column_str = ( - f"{dataset_name}({', '.join(sorted(downstream_columns))})" - ) - column_lineage_key = f"column_lineage[{upstream_table_name}]" - column_lineage_value = ( - f"{{{upstream_column_str} -> {downstream_column_str}}}" - ) - column_lineage[column_lineage_key] = column_lineage_value - logger.debug(f"{column_lineage_key}:{column_lineage_value}") - for external_lineage_entry in external_lineage: + if lineage_entry.upstreamColumns and lineage_entry.downstreamColumns: + # This is not used currently. This indicates same column lineage as was set + # in customProperties earlier - not accurate. + fieldset_finegrained_lineage = FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + downstreamType=FineGrainedLineageDownstreamType.FIELD_SET + if len(lineage_entry.downstreamColumns) > 1 + else FineGrainedLineageDownstreamType.FIELD, + upstreams=sorted( + [ + builder.make_schema_field_urn( + upstream_table_urn, + self.snowflake_identifier(d.columnName), + ) + for d in lineage_entry.upstreamColumns + ] + ), + downstreams=sorted( + [ + builder.make_schema_field_urn( + dataset_urn, self.snowflake_identifier(d.columnName) + ) + for d in lineage_entry.downstreamColumns + ] + ), + ) + fieldset_finegrained_lineages.append(fieldset_finegrained_lineage) + + for col, col_upstreams in lineage.columnLineages.items(): + for fine_upstream in col_upstreams.upstreams: + fieldPath = col + finegrained_lineage_entry = FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + upstreams=sorted( + [ + builder.make_schema_field_urn( + builder.make_dataset_urn_with_platform_instance( + self.platform, + self.get_dataset_identifier_from_qualified_name( + upstream_col.objectName + ), + self.config.platform_instance, + self.config.env, + ), + self.snowflake_identifier(upstream_col.columnName), + ) + for upstream_col in fine_upstream.inputColumns # type:ignore + if upstream_col.objectName + and upstream_col.columnName + and self._is_dataset_pattern_allowed( + upstream_col.objectName, upstream_col.objectDomain + ) + ] + ), + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=sorted( + [ + builder.make_schema_field_urn( + dataset_urn, self.snowflake_identifier(fieldPath) + ) + ] + ), + ) + if finegrained_lineage_entry.upstreams: + finegrained_lineages.append(finegrained_lineage_entry) + + for external_lineage_entry in sorted(external_lineage): # For now, populate only for S3 if external_lineage_entry.startswith("s3://"): external_upstream_table = UpstreamClass( @@ -113,7 +277,16 @@ def _get_upstream_lineage_info( self.report.upstream_lineage[dataset_name] = [ u.dataset for u in upstream_tables ] - return UpstreamLineage(upstreams=upstream_tables), column_lineage + return ( + UpstreamLineage( + upstreams=upstream_tables, + fineGrainedLineages=sorted( + finegrained_lineages, key=lambda x: (x.downstreams, x.upstreams) + ) + or None, + ), + column_lineage, + ) return None def _populate_view_lineage(self, conn: SnowflakeConnection) -> None: @@ -189,7 +362,7 @@ def _populate_lineage(self, conn: SnowflakeConnection) -> None: end_time_millis=int(self.config.end_time.timestamp() * 1000), ) num_edges: int = 0 - self._lineage_map = defaultdict(list) + self._lineage_map = defaultdict(SnowflakeTableLineage) try: for db_row in self.query(conn, query): # key is the down-stream table name @@ -204,19 +377,21 @@ def _populate_lineage(self, conn: SnowflakeConnection) -> None: or self._is_dataset_pattern_allowed(upstream_table_name, "table") ): continue - self._lineage_map[key].append( + + self._lineage_map[key].update_lineage( # (, , ) - ( + SnowflakeUpstreamTable.from_dict( upstream_table_name, - db_row["UPSTREAM_TABLE_COLUMNS"], - db_row["DOWNSTREAM_TABLE_COLUMNS"], - ) + json.loads(db_row["UPSTREAM_TABLE_COLUMNS"]), + json.loads(db_row["DOWNSTREAM_TABLE_COLUMNS"]), + ), ) num_edges += 1 logger.debug( f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}" ) except Exception as e: + logger.error(e, exc_info=e) self.warn( "lineage", f"Extracting lineage from Snowflake failed." @@ -246,15 +421,19 @@ def _populate_view_upstream_lineage(self, conn: SnowflakeConnection) -> None: view_name: str = self.get_dataset_identifier_from_qualified_name( db_row["DOWNSTREAM_VIEW"] ) + if not self._is_dataset_pattern_allowed( dataset_name=view_name, dataset_type=db_row["REFERENCING_OBJECT_DOMAIN"], + ) or not self._is_dataset_pattern_allowed( + view_upstream, db_row["REFERENCED_OBJECT_DOMAIN"] ): continue + # key is the downstream view name - self._lineage_map[view_name].append( + self._lineage_map[view_name].update_lineage( # (, , ) - (view_upstream, "[]", "[]") + SnowflakeUpstreamTable.from_dict(view_upstream, [], []) ) num_edges += 1 logger.debug( @@ -297,20 +476,23 @@ def _populate_view_downstream_lineage(self, conn: SnowflakeConnection) -> None: view_name: str = self.get_dataset_identifier_from_qualified_name( db_row["VIEW_NAME"] ) + downstream_table: str = self.get_dataset_identifier_from_qualified_name( + db_row["DOWNSTREAM_TABLE_NAME"] + ) if not self._is_dataset_pattern_allowed( view_name, db_row["VIEW_DOMAIN"] + ) or not self._is_dataset_pattern_allowed( + downstream_table, db_row["DOWNSTREAM_TABLE_DOMAIN"] ): continue - downstream_table: str = self.get_dataset_identifier_from_qualified_name( - db_row["DOWNSTREAM_TABLE_NAME"] - ) + # Capture view->downstream table lineage. - self._lineage_map[downstream_table].append( + self._lineage_map[downstream_table].update_lineage( # (, , ) - ( + SnowflakeUpstreamTable.from_dict( view_name, - db_row["VIEW_COLUMNS"], - db_row["DOWNSTREAM_TABLE_COLUMNS"], + json.loads(db_row["VIEW_COLUMNS"]), + json.loads(db_row["DOWNSTREAM_TABLE_COLUMNS"]), ) ) self.report.num_view_to_table_edges_scanned += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index d5ded2462ae811..48299d7cc40959 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -250,7 +250,12 @@ def table_to_table_lineage_history( downstream_table_columns AS "DOWNSTREAM_TABLE_COLUMNS" FROM table_lineage_history WHERE upstream_table_domain in ('Table', 'External table') and downstream_table_domain = 'Table' - QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name, upstream_table_name ORDER BY query_start_time DESC) = 1""" + QUALIFY ROW_NUMBER() OVER ( + PARTITION BY downstream_table_name, + upstream_table_name, + downstream_table_columns + ORDER BY query_start_time DESC + ) = 1""" @staticmethod def view_dependencies() -> str: @@ -260,6 +265,7 @@ def view_dependencies() -> str: referenced_database, '.', referenced_schema, '.', referenced_object_name ) AS "VIEW_UPSTREAM", + referenced_object_domain as "REFERENCED_OBJECT_DOMAIN", concat( referencing_database, '.', referencing_schema, '.', referencing_object_name @@ -305,6 +311,7 @@ def view_lineage_history(start_time_millis: int, end_time_millis: int) -> str: view_domain AS "VIEW_DOMAIN", view_columns AS "VIEW_COLUMNS", downstream_table_name AS "DOWNSTREAM_TABLE_NAME", + downstream_table_domain AS "DOWNSTREAM_TABLE_DOMAIN", downstream_table_columns AS "DOWNSTREAM_TABLE_COLUMNS" FROM view_lineage_history @@ -312,7 +319,8 @@ def view_lineage_history(start_time_millis: int, end_time_millis: int) -> str: view_domain in ('View', 'Materialized view') QUALIFY ROW_NUMBER() OVER ( PARTITION BY view_name, - downstream_table_name + downstream_table_name, + downstream_table_columns ORDER BY query_start_time DESC ) = 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index 49b16c2cf5bc95..6759839628eb3b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -52,8 +52,11 @@ class Config: class SnowflakeColumnReference(PermissiveModel): - columnId: int columnName: str + columnId: Optional[int] = None + objectName: Optional[str] = None + objectDomain: Optional[str] = None + objectId: Optional[int] = None class SnowflakeObjectAccessEntry(PermissiveModel): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index c64cb54c25e920..54cee37b4edf39 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -160,6 +160,10 @@ SourceCapability.LINEAGE_COARSE, "Enabled by default, can be disabled via configuration `include_table_lineage` and `include_view_lineage`", ) +@capability( + SourceCapability.LINEAGE_FINE, + "Enabled by default, can be disabled via configuration `include_table_lineage` and `include_view_lineage`", +) @capability( SourceCapability.USAGE_STATS, "Enabled by default, can be disabled via configuration `include_usage_stats", @@ -354,6 +358,11 @@ def query(query): _report[SourceCapability.LINEAGE_COARSE] = CapabilityReport( capable=True ) + + _report[SourceCapability.LINEAGE_FINE] = CapabilityReport( + capable=True + ) + _report[SourceCapability.USAGE_STATS] = CapabilityReport( capable=True ) @@ -378,6 +387,7 @@ def query(query): SourceCapability.DATA_PROFILING: "Either no tables exist or current role does not have permissions to access them", SourceCapability.CONTAINERS: "Current role does not have permissions to use any database", SourceCapability.LINEAGE_COARSE: "Current role does not have permissions to snowflake account usage views", + SourceCapability.LINEAGE_FINE: "Current role does not have permissions to snowflake account usage views", SourceCapability.USAGE_STATS: "Current role does not have permissions to snowflake account usage views", } @@ -389,6 +399,7 @@ def query(query): SourceCapability.DESCRIPTIONS, SourceCapability.DATA_PROFILING, SourceCapability.LINEAGE_COARSE, + SourceCapability.LINEAGE_FINE, SourceCapability.USAGE_STATS, ): failure_message = ( @@ -559,8 +570,8 @@ def _process_view( view.columns = self.get_columns_for_table(conn, view.name, schema_name, db_name) lineage_info = None - if self.config.include_table_lineage: - self.lineage_extractor._get_upstream_lineage_info(view_name) + if self.config.include_view_lineage: + lineage_info = self.lineage_extractor._get_upstream_lineage_info(view_name) yield from self.gen_dataset_workunits(view, schema_name, db_name, lineage_info) def gen_dataset_workunits( From 74d9fa25a6a7eb1aabb2e944beb1a2f26c8e9fd3 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Wed, 28 Sep 2022 20:41:30 -0700 Subject: [PATCH 02/76] feat(ingest): looker - add support for simple column level lineage (#6084) --- .../ingestion/source/looker/looker_common.py | 64 +- .../ingestion/source/looker/lookml_source.py | 66 +- .../integration/lookml/expected_output.json | 540 ++++------------ .../lookml/lookml_mces_api_bigquery.json | 591 +++++------------ .../lookml/lookml_mces_api_hive2.json | 587 +++++------------ .../lookml/lookml_mces_offline.json | 591 +++++------------ ...lookml_mces_offline_platform_instance.json | 593 +++++------------- .../lookml_mces_with_external_urls.json | 582 +++++------------ .../lookml/lookml_reachable_views.json | 327 ++++------ 9 files changed, 1124 insertions(+), 2817 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 9591cab937d960..b6e561c781506f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -30,6 +30,8 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageTypeClass, + FineGrainedLineageDownstreamType, + FineGrainedLineageUpstreamType, UpstreamClass, UpstreamLineage, ) @@ -54,6 +56,7 @@ ChangeTypeClass, DatasetPropertiesClass, EnumTypeClass, + FineGrainedLineageClass, GlobalTagsClass, OwnerClass, OwnershipClass, @@ -169,6 +172,10 @@ class LookerCommonConfig( None, description="Reference to your github location. If present, supplies handy links to your lookml on the dataset entity page.", ) + extract_column_level_lineage: bool = Field( + True, + description="When enabled, extracts column-level lineage from Views and Explores", + ) @dataclass @@ -237,6 +244,7 @@ class ViewField: description: str field_type: ViewFieldType is_primary_key: bool = False + upstream_field: Optional[str] = None class LookerUtil: @@ -622,6 +630,7 @@ def from_api( # noqa: C901 is_primary_key=dim_field.primary_key if dim_field.primary_key else False, + upstream_field=dim_field.name, ) ) if explore.fields.measures is not None: @@ -643,6 +652,7 @@ def from_api( # noqa: C901 is_primary_key=measure_field.primary_key if measure_field.primary_key else False, + upstream_field=measure_field.name, ) ) @@ -746,20 +756,52 @@ def _to_metadata_events( # noqa: C901 dataset_props.externalUrl = self._get_url(base_url) dataset_snapshot.aspects.append(dataset_props) + view_name_to_urn_map = {} if self.upstream_views is not None: assert self.project_name is not None - upstreams = [ - UpstreamClass( - dataset=LookerViewId( - project_name=self.project_name, - model_name=self.model_name, - view_name=view_name, - ).get_urn(config), - type=DatasetLineageTypeClass.VIEW, + upstreams = [] + fine_grained_lineages = [] + for view_name in sorted(self.upstream_views): + view_urn = LookerViewId( + project_name=self.project_name, + model_name=self.model_name, + view_name=view_name, + ).get_urn(config) + + upstreams.append( + UpstreamClass( + dataset=view_urn, + type=DatasetLineageTypeClass.VIEW, + ) ) - for view_name in sorted(self.upstream_views) - ] - upstream_lineage = UpstreamLineage(upstreams=upstreams) + view_name_to_urn_map[view_name] = view_urn + if config.extract_column_level_lineage: + for field in self.fields or []: + if ( + field.upstream_field + and len(field.upstream_field.split(".")) >= 2 + ): + (view_name, field_path) = field.upstream_field.split(".")[ + 0 + ], ".".join(field.upstream_field.split(".")[1:]) + assert view_name + view_urn = view_name_to_urn_map.get(view_name, "") + if view_urn: + fine_grained_lineages.append( + FineGrainedLineageClass( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + downstreamType=FineGrainedLineageDownstreamType.FIELD, + upstreams=[ + builder.make_schema_field_urn( + view_urn, field_path + ) + ], + ) + ) + + upstream_lineage = UpstreamLineage( + upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None + ) dataset_snapshot.aspects.append(upstream_lineage) if self.fields is not None: schema_metadata = LookerUtil._get_schema( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index b175703890f439..0f6e1bba64bc9b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -20,6 +20,7 @@ from datahub.configuration.common import AllowDenyPattern, ConfigurationError from datahub.configuration.github import GitHubInfo from datahub.configuration.source_common import EnvBasedSourceConfigBase +from datahub.emitter.mce_builder import make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( @@ -48,6 +49,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import BrowsePaths, Status from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageTypeClass, + FineGrainedLineageDownstreamType, UpstreamClass, UpstreamLineage, ViewProperties, @@ -57,6 +59,8 @@ from datahub.metadata.schema_classes import ( ChangeTypeClass, DatasetPropertiesClass, + FineGrainedLineageClass, + FineGrainedLineageUpstreamTypeClass, SubTypesClass, ) from datahub.utilities.sql_parser import SQLParser @@ -577,7 +581,10 @@ def _get_sql_info(cls, sql: str, sql_parser_path: str) -> SQLInfo: @classmethod def _get_fields( - cls, field_list: List[Dict], type_cls: ViewFieldType + cls, + field_list: List[Dict], + type_cls: ViewFieldType, + extract_column_level_lineage: bool, ) -> List[ViewField]: fields = [] for field_dict in field_list: @@ -586,6 +593,19 @@ def _get_fields( native_type = field_dict.get("type", "string") description = field_dict.get("description", "") label = field_dict.get("label", "") + upstream_field = None + if type_cls == ViewFieldType.DIMENSION and extract_column_level_lineage: + if field_dict.get("sql") is not None: + upstream_field_match = re.match( + r"^.*\${TABLE}\.(.*)$", field_dict["sql"] + ) + if upstream_field_match: + matched_field = upstream_field_match.group(1) + # Remove quotes from field names + matched_field = ( + matched_field.replace('"', "").replace("`", "").lower() + ) + upstream_field = matched_field field = ViewField( name=name, @@ -594,6 +614,7 @@ def _get_fields( description=description, is_primary_key=is_primary_key, field_type=type_cls, + upstream_field=upstream_field, ) fields.append(field) return fields @@ -611,6 +632,7 @@ def from_looker_dict( max_file_snippet_length: int, parse_table_names_from_sql: bool = False, sql_parser_path: str = "datahub.utilities.sql_parser.DefaultSQLParser", + extract_col_level_lineage: bool = False, ) -> Optional["LookerView"]: view_name = looker_view["name"] logger.debug(f"Handling view {view_name} in model {model_name}") @@ -635,13 +657,19 @@ def from_looker_dict( derived_table = looker_view.get("derived_table") dimensions = cls._get_fields( - looker_view.get("dimensions", []), ViewFieldType.DIMENSION + looker_view.get("dimensions", []), + ViewFieldType.DIMENSION, + extract_col_level_lineage, ) dimension_groups = cls._get_fields( - looker_view.get("dimension_groups", []), ViewFieldType.DIMENSION_GROUP + looker_view.get("dimension_groups", []), + ViewFieldType.DIMENSION_GROUP, + extract_col_level_lineage, ) measures = cls._get_fields( - looker_view.get("measures", []), ViewFieldType.MEASURE + looker_view.get("measures", []), + ViewFieldType.MEASURE, + extract_col_level_lineage, ) fields: List[ViewField] = dimensions + dimension_groups + measures @@ -993,15 +1021,40 @@ def _get_upstream_lineage( for sql_table_name in looker_view.sql_table_names: sql_table_name = sql_table_name.replace('"', "").replace("`", "") + upstream_dataset_urn: str = self._construct_datalineage_urn( + sql_table_name, looker_view + ) + fine_grained_lineages: List[FineGrainedLineageClass] = [] + if self.source_config.extract_column_level_lineage: + for field in looker_view.fields: + if field.upstream_field is not None: + fine_grained_lineage = FineGrainedLineageClass( + upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET, + upstreams=[ + make_schema_field_urn( + upstream_dataset_urn, field.upstream_field + ) + ], + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=[ + make_schema_field_urn( + looker_view.id.get_urn(self.source_config), + field.name, + ) + ], + ) + fine_grained_lineages.append(fine_grained_lineage) upstream = UpstreamClass( - dataset=self._construct_datalineage_urn(sql_table_name, looker_view), + dataset=upstream_dataset_urn, type=DatasetLineageTypeClass.VIEW, ) upstreams.append(upstream) if upstreams != []: - return UpstreamLineage(upstreams=upstreams) + return UpstreamLineage( + upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None + ) else: return None @@ -1224,6 +1277,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 self.source_config.max_file_snippet_length, self.source_config.parse_table_names_from_sql, self.source_config.sql_parser, + self.source_config.extract_column_level_lineage, ) except Exception as e: self.reporter.report_warning( diff --git a/metadata-ingestion/tests/integration/lookml/expected_output.json b/metadata-ingestion/tests/integration/lookml/expected_output.json index e445107db3ca75..68a7392bc27d1e 100644 --- a/metadata-ingestion/tests/integration/lookml/expected_output.json +++ b/metadata-ingestion/tests/integration/lookml/expected_output.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..my_table,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..my_table,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..my_table,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..my_table,PROD),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -216,31 +202,21 @@ "customProperties": { "looker.file.path": "foo.view.lkml" }, - "externalUrl": null, "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +225,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +239,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", @@ -298,15 +265,36 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +304,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +319,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +332,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +353,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -410,28 +374,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -442,20 +398,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -463,31 +413,21 @@ "customProperties": { "looker.file.path": "bar.view.lkml" }, - "externalUrl": null, "name": "my_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -496,17 +436,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -515,14 +450,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", @@ -545,15 +476,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -561,31 +489,21 @@ "customProperties": { "looker.file.path": "included_view_file.view.lkml" }, - "externalUrl": null, "name": "include_able_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -594,17 +512,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -613,14 +526,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", @@ -643,15 +552,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -659,31 +565,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,17 +588,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -711,14 +602,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", @@ -741,15 +628,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -759,19 +643,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -781,12 +658,9 @@ "fields": [ { "fieldPath": "additional_measure", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -797,20 +671,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -818,31 +686,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "extending_looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -851,17 +709,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -870,14 +723,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", @@ -900,15 +749,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..autodetect_sql_name_based_on_view_name,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -916,31 +762,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "autodetect_sql_name_based_on_view_name", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -949,17 +785,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -968,14 +799,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", @@ -998,15 +825,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1014,31 +838,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "test_include_external_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1047,17 +861,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1066,14 +875,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", @@ -1096,15 +901,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..fragment_derived_view,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1114,19 +916,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1135,13 +930,10 @@ }, "fields": [ { - "fieldPath": "date", - "jsonPath": null, + "fieldPath": "aliased_platform", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1152,19 +944,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1175,19 +961,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { - "fieldPath": "aliased_platform", - "jsonPath": null, + "fieldPath": "date", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1198,15 +978,10 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -1214,31 +989,21 @@ "customProperties": { "looker.file.path": "nested/fragment_derived.view.lkml" }, - "externalUrl": null, "name": "fragment_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1247,17 +1012,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1266,14 +1026,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", @@ -1295,31 +1051,21 @@ "customProperties": { "looker.file.path": "liquid.view.lkml" }, - "externalUrl": null, "name": "customer_facts", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1328,17 +1074,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1347,10 +1088,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json index 1fbabde47c976f..479a8171e08a55 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.default-db.my_table,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.default-db.my_table,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.default-db.my_table,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.default-db.my_table,PROD),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -216,31 +202,21 @@ "customProperties": { "looker.file.path": "foo.view.lkml" }, - "externalUrl": null, "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +225,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +239,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", @@ -298,15 +265,36 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +304,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +319,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +332,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +353,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -410,28 +374,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -442,20 +398,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -463,31 +413,21 @@ "customProperties": { "looker.file.path": "bar.view.lkml" }, - "externalUrl": null, "name": "my_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -496,17 +436,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -515,14 +450,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", @@ -545,15 +476,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -561,31 +489,21 @@ "customProperties": { "looker.file.path": "included_view_file.view.lkml" }, - "externalUrl": null, "name": "include_able_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -594,17 +512,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -613,14 +526,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", @@ -643,15 +552,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -659,31 +565,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,17 +588,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -711,14 +602,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", @@ -741,15 +628,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -759,19 +643,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -781,12 +658,9 @@ "fields": [ { "fieldPath": "additional_measure", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -797,20 +671,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -818,31 +686,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "extending_looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -851,17 +709,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -870,14 +723,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", @@ -900,15 +749,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.default-db.autodetect_sql_name_based_on_view_name,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -916,31 +762,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "autodetect_sql_name_based_on_view_name", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -949,17 +785,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -968,14 +799,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", @@ -998,15 +825,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1014,31 +838,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "test_include_external_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1047,17 +861,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1066,14 +875,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", @@ -1096,15 +901,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.default-db.fragment_derived_view,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1114,19 +916,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1135,13 +930,10 @@ }, "fields": [ { - "fieldPath": "country", - "jsonPath": null, + "fieldPath": "aliased_platform", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1152,19 +944,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { - "fieldPath": "aliased_platform", - "jsonPath": null, + "fieldPath": "country", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1175,19 +961,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "date", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1198,15 +978,10 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -1214,31 +989,21 @@ "customProperties": { "looker.file.path": "nested/fragment_derived.view.lkml" }, - "externalUrl": null, "name": "fragment_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1247,17 +1012,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1266,14 +1026,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", @@ -1295,31 +1051,21 @@ "customProperties": { "looker.file.path": "liquid.view.lkml" }, - "externalUrl": null, "name": "customer_facts", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1328,17 +1074,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1347,14 +1088,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", @@ -1364,39 +1101,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", - "description": "A tag that is applied to all dimension fields.", - "colorHex": null + "description": "A tag that is applied to all dimension fields." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", @@ -1406,39 +1134,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.", - "colorHex": null + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", @@ -1448,35 +1167,27 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on", - "colorHex": null + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json index e2bb1909c44b57..b0fe368ee2625a 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,default-hive-db.my_table,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,default-hive-db.my_table,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,default-hive-db.my_table,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,default-hive-db.my_table,PROD),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -216,31 +202,21 @@ "customProperties": { "looker.file.path": "foo.view.lkml" }, - "externalUrl": null, "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +225,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +239,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", @@ -298,15 +265,36 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +304,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +319,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +332,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +353,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -410,28 +374,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -442,20 +398,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -463,31 +413,21 @@ "customProperties": { "looker.file.path": "bar.view.lkml" }, - "externalUrl": null, "name": "my_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -496,17 +436,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -515,14 +450,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", @@ -545,15 +476,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -561,31 +489,21 @@ "customProperties": { "looker.file.path": "included_view_file.view.lkml" }, - "externalUrl": null, "name": "include_able_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -594,17 +512,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -613,14 +526,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", @@ -643,15 +552,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -659,31 +565,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,17 +588,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -711,14 +602,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", @@ -741,15 +628,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -759,19 +643,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -781,12 +658,9 @@ "fields": [ { "fieldPath": "additional_measure", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -797,20 +671,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -818,31 +686,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "extending_looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -851,17 +709,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -870,14 +723,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", @@ -900,15 +749,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,default-hive-db.autodetect_sql_name_based_on_view_name,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -916,31 +762,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "autodetect_sql_name_based_on_view_name", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -949,17 +785,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -968,14 +799,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", @@ -998,15 +825,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1014,31 +838,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "test_include_external_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1047,17 +861,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1066,14 +875,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", @@ -1096,15 +901,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,default-hive-db.fragment_derived_view,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1114,19 +916,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1136,12 +931,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1152,19 +944,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "date", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1175,19 +961,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "aliased_platform", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1198,15 +978,10 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -1214,31 +989,21 @@ "customProperties": { "looker.file.path": "nested/fragment_derived.view.lkml" }, - "externalUrl": null, "name": "fragment_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1247,17 +1012,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1266,14 +1026,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", @@ -1295,31 +1051,21 @@ "customProperties": { "looker.file.path": "liquid.view.lkml" }, - "externalUrl": null, "name": "customer_facts", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1328,17 +1074,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1347,14 +1088,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", @@ -1364,39 +1101,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", - "description": "A tag that is applied to all dimension fields.", - "colorHex": null + "description": "A tag that is applied to all dimension fields." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", @@ -1406,39 +1134,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.", - "colorHex": null + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", @@ -1448,35 +1167,27 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on", - "colorHex": null + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json index 05c6ff45ab4472..25e466dc00c671 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -216,31 +202,21 @@ "customProperties": { "looker.file.path": "foo.view.lkml" }, - "externalUrl": null, "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +225,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +239,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", @@ -298,15 +265,36 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +304,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +319,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +332,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +353,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -410,28 +374,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -442,20 +398,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -463,31 +413,21 @@ "customProperties": { "looker.file.path": "bar.view.lkml" }, - "externalUrl": null, "name": "my_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -496,17 +436,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -515,14 +450,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", @@ -545,15 +476,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -561,31 +489,21 @@ "customProperties": { "looker.file.path": "included_view_file.view.lkml" }, - "externalUrl": null, "name": "include_able_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -594,17 +512,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -613,14 +526,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", @@ -643,15 +552,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -659,31 +565,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,17 +588,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -711,14 +602,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", @@ -741,15 +628,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -759,19 +643,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -781,12 +658,9 @@ "fields": [ { "fieldPath": "additional_measure", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -797,20 +671,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -818,31 +686,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "extending_looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -851,17 +709,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -870,14 +723,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", @@ -900,15 +749,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.autodetect_sql_name_based_on_view_name,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -916,31 +762,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "autodetect_sql_name_based_on_view_name", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -949,17 +785,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -968,14 +799,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", @@ -998,15 +825,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1014,31 +838,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "test_include_external_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1047,17 +861,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1066,14 +875,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", @@ -1096,15 +901,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.fragment_derived_view,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1114,19 +916,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1135,13 +930,10 @@ }, "fields": [ { - "fieldPath": "date", - "jsonPath": null, + "fieldPath": "aliased_platform", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1152,19 +944,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { - "fieldPath": "aliased_platform", - "jsonPath": null, + "fieldPath": "date", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1175,19 +961,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1198,15 +978,10 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -1214,31 +989,21 @@ "customProperties": { "looker.file.path": "nested/fragment_derived.view.lkml" }, - "externalUrl": null, "name": "fragment_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1247,17 +1012,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1266,14 +1026,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", @@ -1295,31 +1051,21 @@ "customProperties": { "looker.file.path": "liquid.view.lkml" }, - "externalUrl": null, "name": "customer_facts", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1328,17 +1074,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1347,14 +1088,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", @@ -1364,39 +1101,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", - "description": "A tag that is applied to all dimension fields.", - "colorHex": null + "description": "A tag that is applied to all dimension fields." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", @@ -1406,39 +1134,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.", - "colorHex": null + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", @@ -1448,35 +1167,27 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on", - "colorHex": null + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json index 895876c737e95d..42fb295754755a 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -216,31 +202,21 @@ "customProperties": { "looker.file.path": "foo.view.lkml" }, - "externalUrl": null, "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +225,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +239,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", @@ -298,15 +265,36 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +304,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +319,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +332,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +353,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -410,28 +374,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -442,20 +398,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -463,31 +413,21 @@ "customProperties": { "looker.file.path": "bar.view.lkml" }, - "externalUrl": null, "name": "my_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -496,17 +436,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -515,14 +450,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", @@ -545,15 +476,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.looker_schema.include_able,DEV)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -561,31 +489,21 @@ "customProperties": { "looker.file.path": "included_view_file.view.lkml" }, - "externalUrl": null, "name": "include_able_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -594,17 +512,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -613,14 +526,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", @@ -643,15 +552,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.looker_schema.events,DEV)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -659,31 +565,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,17 +588,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -711,14 +602,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", @@ -741,15 +628,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.looker_schema.events,DEV)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -759,19 +643,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -781,12 +658,9 @@ "fields": [ { "fieldPath": "additional_measure", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -797,20 +671,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -818,31 +686,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "extending_looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -851,17 +709,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -870,14 +723,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", @@ -900,15 +749,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.autodetect_sql_name_based_on_view_name,DEV)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -916,31 +762,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "autodetect_sql_name_based_on_view_name", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -949,17 +785,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -968,14 +799,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", @@ -998,15 +825,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.looker_schema.include_able,DEV)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1014,31 +838,21 @@ "customProperties": { "looker.file.path": "view_declarations.view.lkml" }, - "externalUrl": null, "name": "test_include_external_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1047,17 +861,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1066,14 +875,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", @@ -1096,15 +901,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.fragment_derived_view,DEV)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1114,19 +916,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1135,13 +930,10 @@ }, "fields": [ { - "fieldPath": "date", - "jsonPath": null, + "fieldPath": "country", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1152,19 +944,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { - "fieldPath": "aliased_platform", - "jsonPath": null, + "fieldPath": "date", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1175,19 +961,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { - "fieldPath": "country", - "jsonPath": null, + "fieldPath": "aliased_platform", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1198,15 +978,10 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -1214,31 +989,21 @@ "customProperties": { "looker.file.path": "nested/fragment_derived.view.lkml" }, - "externalUrl": null, "name": "fragment_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1247,17 +1012,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1266,14 +1026,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", @@ -1295,31 +1051,21 @@ "customProperties": { "looker.file.path": "liquid.view.lkml" }, - "externalUrl": null, "name": "customer_facts", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1328,17 +1074,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1347,14 +1088,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", @@ -1364,39 +1101,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", - "description": "A tag that is applied to all dimension fields.", - "colorHex": null + "description": "A tag that is applied to all dimension fields." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", @@ -1406,39 +1134,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.", - "colorHex": null + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", @@ -1448,35 +1167,27 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on", - "colorHex": null + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json index 991b043c7dc65d..ed0a8d6f075063 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.my_table,PROD),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -218,29 +204,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/foo.view.lkml", "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +226,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +240,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", @@ -298,15 +266,36 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD),city)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +305,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +320,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +333,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +354,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -410,28 +375,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -442,20 +399,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -465,29 +416,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/bar.view.lkml", "name": "my_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -496,17 +438,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -515,14 +452,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", @@ -545,15 +478,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -563,29 +493,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/included_view_file.view.lkml", "name": "include_able_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -594,17 +515,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -613,14 +529,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", @@ -643,15 +555,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -661,29 +570,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/view_declarations.view.lkml", "name": "looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,17 +592,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -711,14 +606,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", @@ -741,15 +632,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -759,19 +647,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -781,12 +662,9 @@ "fields": [ { "fieldPath": "additional_measure", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -797,20 +675,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -820,29 +692,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/view_declarations.view.lkml", "name": "extending_looker_events", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -851,17 +714,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -870,14 +728,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", @@ -900,15 +754,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.autodetect_sql_name_based_on_view_name,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -918,29 +769,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/view_declarations.view.lkml", "name": "autodetect_sql_name_based_on_view_name", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -949,17 +791,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -968,14 +805,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", @@ -998,15 +831,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.include_able,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1016,29 +846,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/view_declarations.view.lkml", "name": "test_include_external_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1047,17 +868,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1066,14 +882,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", @@ -1096,15 +908,12 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.fragment_derived_view,PROD)", "type": "VIEW" } - ], - "fineGrainedLineages": null + ] } }, { @@ -1114,19 +923,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1135,13 +937,10 @@ }, "fields": [ { - "fieldPath": "country", - "jsonPath": null, + "fieldPath": "date", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1152,19 +951,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { - "fieldPath": "date", - "jsonPath": null, + "fieldPath": "country", "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1175,19 +968,13 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "aliased_platform", - "jsonPath": null, "nullable": false, "description": "", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NullType": {} @@ -1198,15 +985,10 @@ "globalTags": { "tags": [] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -1216,29 +998,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/nested/fragment_derived.view.lkml", "name": "fragment_derived_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1247,17 +1020,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1266,14 +1034,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", @@ -1297,29 +1061,20 @@ }, "externalUrl": "https://github.com/datahub/looker-demo/blob/master/liquid.view.lkml", "name": "customer_facts", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1328,17 +1083,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1347,14 +1097,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", @@ -1364,39 +1110,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", - "description": "A tag that is applied to all dimension fields.", - "colorHex": null + "description": "A tag that is applied to all dimension fields." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", @@ -1406,39 +1143,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.", - "colorHex": null + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", @@ -1448,35 +1176,27 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on", - "colorHex": null + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json index bdeb390a58b00c..784a9bfb83c6c2 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json @@ -1,6 +1,5 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", @@ -23,15 +22,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.my_table,DEV),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -41,19 +72,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -63,12 +87,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -79,24 +100,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -107,24 +121,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -135,24 +142,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -163,28 +163,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -195,20 +187,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -216,31 +202,21 @@ "customProperties": { "looker.file.path": "foo.view.lkml" }, - "externalUrl": null, "name": "my_view", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -249,17 +225,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -268,14 +239,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)", @@ -298,15 +265,47 @@ { "auditStamp": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,rs_warehouse.default_db.default_schema.my_table,DEV)", "type": "VIEW" } ], - "fineGrainedLineages": null + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,rs_warehouse.default_db.default_schema.my_table,DEV),country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD),country)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,rs_warehouse.default_db.default_schema.my_table,DEV),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD),city)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,rs_warehouse.default_db.default_schema.my_table,DEV),is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD),is_latest)" + ], + "confidenceScore": 1.0 + } + ] } }, { @@ -316,19 +315,12 @@ "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -338,12 +330,9 @@ "fields": [ { "fieldPath": "country", - "jsonPath": null, "nullable": false, "description": "The country", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -354,24 +343,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "city", - "jsonPath": null, "nullable": false, "description": "City", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -382,24 +364,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "is_latest", - "jsonPath": null, "nullable": false, "description": "Is latest data", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -410,24 +385,17 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "timestamp", - "jsonPath": null, "nullable": false, "description": "Timestamp of measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -438,28 +406,20 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Dimension", - "context": null + "tag": "urn:li:tag:Dimension" }, { - "tag": "urn:li:tag:Temporal", - "context": null + "tag": "urn:li:tag:Temporal" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "average_measurement", - "jsonPath": null, "nullable": false, "description": "My measurement", "label": "", - "created": null, - "lastModified": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -470,20 +430,14 @@ "globalTags": { "tags": [ { - "tag": "urn:li:tag:Measure", - "context": null + "tag": "urn:li:tag:Measure" } ] }, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null + "isPartOfKey": false } ], - "primaryKeys": [], - "foreignKeysSpecs": null, - "foreignKeys": null + "primaryKeys": [] } }, { @@ -491,31 +445,21 @@ "customProperties": { "looker.file.path": "foo2.view.lkml" }, - "externalUrl": null, "name": "my_view2", - "qualifiedName": null, - "description": null, - "uri": null, "tags": [] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -524,17 +468,12 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view2,PROD)", - "entityKeyAspect": null, "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -543,14 +482,10 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Dimension", @@ -560,39 +495,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Dimension", - "description": "A tag that is applied to all dimension fields.", - "colorHex": null + "description": "A tag that is applied to all dimension fields." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Temporal", @@ -602,39 +528,30 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Temporal", - "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations.", - "colorHex": null + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { "urn": "urn:li:tag:Measure", @@ -644,35 +561,27 @@ "owners": [ { "owner": "urn:li:corpuser:datahub", - "type": "DATAOWNER", - "source": null + "type": "DATAOWNER" } ], "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "message": null + "actor": "urn:li:corpuser:unknown" } } }, { "com.linkedin.pegasus2avro.tag.TagProperties": { "name": "Measure", - "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on", - "colorHex": null + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "lookml-test", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "lookml-test" } } ] \ No newline at end of file From f83eb6f75956dc5e07a67d447d22286be3697687 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 29 Sep 2022 06:23:45 +0100 Subject: [PATCH 03/76] fix(elastic-setup) Fixing env var logic (#6079) --- docker/elasticsearch-setup/create-indices.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/elasticsearch-setup/create-indices.sh b/docker/elasticsearch-setup/create-indices.sh index 23b5282d09fdc9..2aadb7fb648839 100755 --- a/docker/elasticsearch-setup/create-indices.sh +++ b/docker/elasticsearch-setup/create-indices.sh @@ -4,6 +4,7 @@ set -e : ${DATAHUB_ANALYTICS_ENABLED:=true} : ${USE_AWS_ELASTICSEARCH:=false} +: ${ELASTICSEARCH_INSECURE:=false} if [[ $ELASTICSEARCH_USE_SSL == true ]]; then ELASTICSEARCH_PROTOCOL=https @@ -23,8 +24,11 @@ if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then ELASTICSEARCH_AUTH_HEADER="Accept: */*" fi -if [[ $ELASTICSEARCH_INSECURE ]]; then +if [[ $ELASTICSEARCH_INSECURE == true ]]; then + echo -e "Going to use default elastic insecure mode" ELASTICSEARCH_INSECURE="-k " +else + unset ELASTICSEARCH_INSECURE fi function create_datahub_usage_event_datastream() { From 164bc1aa53f92e5f20d65b87cd4e70c078576395 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 29 Sep 2022 11:14:32 +0100 Subject: [PATCH 04/76] Revert "chore(setup): change defaults for partitions (#6074)" (#6086) --- docker/kafka-setup/kafka-setup.sh | 2 +- metadata-service/factories/src/main/resources/application.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh index d8cc4a251cb151..e6dcf6e199febd 100755 --- a/docker/kafka-setup/kafka-setup.sh +++ b/docker/kafka-setup/kafka-setup.sh @@ -1,5 +1,5 @@ #!/bin/bash -: ${PARTITIONS:=6} +: ${PARTITIONS:=1} : ${REPLICATION_FACTOR:=1} : ${KAFKA_PROPERTIES_SECURITY_PROTOCOL:=PLAINTEXT} diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index 901c185e561af8..e136a186028506 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -147,7 +147,7 @@ elasticsearch: retryInterval: ${ES_BULK_RETRY_INTERVAL:1} index: prefix: ${INDEX_PREFIX:} - numShards: ${ELASTICSEARCH_NUM_SHARDS_PER_INDEX:6} + numShards: ${ELASTICSEARCH_NUM_SHARDS_PER_INDEX:1} numReplicas: ${ELASTICSEARCH_NUM_REPLICAS_PER_INDEX:1} numRetries: ${ELASTICSEARCH_INDEX_BUILDER_NUM_RETRIES:3} maxArrayLength: ${SEARCH_DOCUMENT_MAX_ARRAY_LENGTH:1000} From 9e7bd1a84f7c8a7cbeb3d88989a725ce349e99ee Mon Sep 17 00:00:00 2001 From: Patrick Marx <6949483+codesorcery@users.noreply.github.com> Date: Thu, 29 Sep 2022 13:13:45 +0200 Subject: [PATCH 05/76] fix(mae-consumer): fix regression on base64 encoding (#6061) Pull request #5827 introduced a regression by removing coreutils from the mae-consumer Dockerfile (coreutils was added in #3723). This broke the base64 call in the startup script s.th. the Elasticsearch auth header will not be correctly set when username and password are provided. To make sure that the startup script fails on these errors in the future, set "-euo pipefail" which lets the bash script fail on errors and unset variables. Also refactor the startup script to make it more stable and readable. Co-authored-by: Pedro Silva --- docker/datahub-mae-consumer/Dockerfile | 2 +- docker/datahub-mae-consumer/start.sh | 61 ++++++++++---------------- 2 files changed, 24 insertions(+), 39 deletions(-) diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index c86738e9ff3162..96a34d22a7e638 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -14,7 +14,7 @@ RUN apk --no-cache --update-cache --available upgrade \ else \ echo >&2 "Unsupported architecture $(arch)" ; exit 1; \ fi \ - && apk --no-cache add tar curl bash \ + && apk --no-cache add tar curl bash coreutils \ && apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \ && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \ diff --git a/docker/datahub-mae-consumer/start.sh b/docker/datahub-mae-consumer/start.sh index e8b9d182ca24c0..a6c5c96842b856 100755 --- a/docker/datahub-mae-consumer/start.sh +++ b/docker/datahub-mae-consumer/start.sh @@ -1,61 +1,46 @@ #!/bin/bash +set -euo pipefail # Add default URI (http) scheme if needed -if ! echo $NEO4J_HOST | grep -q "://" ; then - NEO4J_HOST="http://$NEO4J_HOST" +if [[ -n ${NEO4J_HOST:-} ]] && [[ ${NEO4J_HOST} != *"://"* ]]; then + NEO4J_HOST="http://$NEO4J_HOST" fi -if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then +if [[ -n ${ELASTICSEARCH_USERNAME:-} ]] && [[ -z ${ELASTICSEARCH_AUTH_HEADER:-} ]]; then AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" fi # Add default header if needed -if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then - ELASTICSEARCH_AUTH_HEADER="Accept: */*" -fi +: "${ELASTICSEARCH_AUTH_HEADER="Accept: */*"}" -if [[ $ELASTICSEARCH_USE_SSL == true ]]; then +if [[ ${ELASTICSEARCH_USE_SSL:-false} == true ]]; then ELASTICSEARCH_PROTOCOL=https else ELASTICSEARCH_PROTOCOL=http fi -WAIT_FOR_KAFKA="" -if [[ $SKIP_KAFKA_CHECK != true ]]; then - WAIT_FOR_KAFKA=" -wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') " +dockerize_args=("-timeout" "240s") +if [[ ${SKIP_KAFKA_CHECK:-false} != true ]]; then + IFS=',' read -ra KAFKAS <<< "$KAFKA_BOOTSTRAP_SERVER" + for i in "${KAFKAS[@]}"; do + dockerize_args+=("-wait" "tcp://$i") + done fi - -WAIT_FOR_ELASTICSEARCH="" -if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then - WAIT_FOR_ELASTICSEARCH=" -wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT -wait-http-header \"$ELASTICSEARCH_AUTH_HEADER\"" +if [[ ${SKIP_ELASTICSEARCH_CHECK:-false} != true ]]; then + dockerize_args+=("-wait" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT" "-wait-http-header" "$ELASTICSEARCH_AUTH_HEADER") fi - -WAIT_FOR_NEO4J="" -if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]] && [[ $SKIP_NEO4J_CHECK != true ]]; then - WAIT_FOR_NEO4J=" -wait $NEO4J_HOST " +if [[ ${GRAPH_SERVICE_IMPL:-} != elasticsearch ]] && [[ ${SKIP_NEO4J_CHECK:-false} != true ]]; then + dockerize_args+=("-wait" "$NEO4J_HOST") fi -OTEL_AGENT="" -if [[ $ENABLE_OTEL == true ]]; then - OTEL_AGENT="-javaagent:opentelemetry-javaagent-all.jar " +JDK_JAVA_OPTIONS="${JDK_JAVA_OPTIONS:-}${JAVA_OPTS:+ JAVA_OPTS}${JMX_OPTS:+ JMX_OPTS}" +if [[ ${ENABLE_OTEL:-false} == true ]]; then + JDK_JAVA_OPTIONS="$JDK_JAVA_OPTIONS -javaagent:opentelemetry-javaagent-all.jar" fi - -PROMETHEUS_AGENT="" -if [[ $ENABLE_PROMETHEUS == true ]]; then - PROMETHEUS_AGENT="-javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml " +if [[ ${ENABLE_PROMETHEUS:-false} == true ]]; then + JDK_JAVA_OPTIONS="$JDK_JAVA_OPTIONS -javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml" fi -COMMON=" - $WAIT_FOR_KAFKA \ - $WAIT_FOR_NEO4J \ - -timeout 240s \ - java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar -" -if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then - exec dockerize \ - -wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT -wait-http-header "$ELASTICSEARCH_AUTH_HEADER" \ - $COMMON -else - exec dockerize $COMMON -fi +export JDK_JAVA_OPTIONS +exec dockerize "${dockerize_args[@]}" java -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar From 596d48448ece0a4f3d374b95802ded78f3f71d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Kub=C3=ADn?= Date: Thu, 29 Sep 2022 15:10:51 +0200 Subject: [PATCH 06/76] fix(elasticsearch) Analytics indices creation on AWS ES (#5502) * refactor(elasticsearch-setup-job): create-indices.sh readability The script contains many copy-pasting and is not easy to follow. Add comments, extract commonly used operations into functions, unify approaches. * fix(elasticsearch-setup-job): AWS indices creation Fix the issue where Amazon OpenSearch (AWS ES) indices are incorrectly initialised and the Analytics screen shows errors only. * feat(elasticsearch-setup-job): configuration hint mention USE_AWS_ELASTICSEARCH env value if it seems it's set the wrong way * fix(elasticsearch-setup-job): silent curl * fix(elasticsearch-setup-job): better USE_AWS_ELASTICSEARCH hint * docs(elasticsearch-setup-job): index dropping explained - more comments - more defensive approach - index file renamed * fix(elasticsearch-setup-job): script fixes * merge(elasticsearch-setup-job): merging in PR #5937 * merge(elasticsearch-setup-job): merging in PR #5963 * merge(elasticsearch-setup-job): merging in PR #5975 Co-authored-by: Pedro Silva --- docker/elasticsearch-setup/create-indices.sh | 181 +++++++++++------- .../index/usage-event/aws_es_index.json | 7 + 2 files changed, 117 insertions(+), 71 deletions(-) mode change 100755 => 100644 docker/elasticsearch-setup/create-indices.sh create mode 100644 metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_index.json diff --git a/docker/elasticsearch-setup/create-indices.sh b/docker/elasticsearch-setup/create-indices.sh old mode 100755 new mode 100644 index 2aadb7fb648839..62dd80afc584e5 --- a/docker/elasticsearch-setup/create-indices.sh +++ b/docker/elasticsearch-setup/create-indices.sh @@ -6,98 +6,137 @@ set -e : ${USE_AWS_ELASTICSEARCH:=false} : ${ELASTICSEARCH_INSECURE:=false} +# protocol: http or https? if [[ $ELASTICSEARCH_USE_SSL == true ]]; then ELASTICSEARCH_PROTOCOL=https else ELASTICSEARCH_PROTOCOL=http fi -echo -e "Going to use protocol: $ELASTICSEARCH_PROTOCOL" +echo -e "going to use protocol: $ELASTICSEARCH_PROTOCOL" -if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then - AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) - ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" -fi +# Elasticsearch URL to be suffixed with a resource address +ELASTICSEARCH_URL="$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT" -# Add default header if needed +# set auth header if none is given if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then - echo -e "Going to use default elastic headers" - ELASTICSEARCH_AUTH_HEADER="Accept: */*" + if [[ ! -z $ELASTICSEARCH_USERNAME ]]; then + # no auth header given, but username is defined -> use it to create the auth header + AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0) + ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN" + echo -e "going to use elastic headers based on username and password" + else + # no auth header or username given -> use default auth header + ELASTICSEARCH_AUTH_HEADER="Accept: */*" + echo -e "going to use default elastic headers" + fi fi +# will be using this for all curl communication with Elasticsearch: +CURL_ARGS=( + --silent + --header "$ELASTICSEARCH_AUTH_HEADER" +) +# ... also optionally use --insecure if [[ $ELASTICSEARCH_INSECURE == true ]]; then - echo -e "Going to use default elastic insecure mode" - ELASTICSEARCH_INSECURE="-k " -else - unset ELASTICSEARCH_INSECURE + CURL_ARGS+=(--insecure) fi -function create_datahub_usage_event_datastream() { - if [[ -z "$INDEX_PREFIX" ]]; then - PREFIX='' - else - PREFIX="${INDEX_PREFIX}_" - fi - echo -e "Create datahub_usage_event if needed against Elasticsearch at $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT" - echo -e "Going to use index prefix:$PREFIX:" - POLICY_RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy") - echo -e "Policy GET response code is $POLICY_RESPONSE_CODE" - POLICY_NAME="${PREFIX}datahub_usage_event_policy" - if [ $POLICY_RESPONSE_CODE -eq 404 ]; then - echo -e "\ncreating $POLICY_NAME" - sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/policy.json | tee -a /tmp/policy.json - curl -s -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/$POLICY_NAME" --header "Content-Type: application/json" --data "@/tmp/policy.json" - elif [ $POLICY_RESPONSE_CODE -eq 200 ]; then - echo -e "\n${POLICY_NAME} exists" - elif [ $POLICY_RESPONSE_CODE -eq 403 ]; then - echo -e "Forbidden so exiting" - exit 1 - else - echo -e "Got response code $POLICY_RESPONSE_CODE while creating policy so exiting." - exit 1 - fi - - TEMPLATE_RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template") - echo -e "Template GET response code is $TEMPLATE_RESPONSE_CODE" - TEMPLATE_NAME="${PREFIX}datahub_usage_event_index_template" - if [ $TEMPLATE_RESPONSE_CODE -eq 404 ]; then - echo -e "\ncreating $TEMPLATE_NAME" - sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/index_template.json | tee -a /tmp/index_template.json - curl -s -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/$TEMPLATE_NAME" --header "Content-Type: application/json" --data "@/tmp/index_template.json" - elif [ $TEMPLATE_RESPONSE_CODE -eq 200 ]; then - echo -e "\n$TEMPLATE_NAME exists" - elif [ $TEMPLATE_RESPONSE_CODE -eq 403 ]; then - echo -e "Forbidden so exiting" +# index prefix used throughout the script +if [[ -z "$INDEX_PREFIX" ]]; then + PREFIX='' + echo -e "not using any prefix" +else + PREFIX="${INDEX_PREFIX}_" + echo -e "going to use prefix: '$PREFIX'" +fi + +# path where index definitions are stored +INDEX_DEFINITIONS_ROOT=/index/usage-event + + +# check Elasticsearch for given index/resource (first argument) +# if it doesn't exist (http code 404), use the given file (second argument) to create it +function create_if_not_exists { + RESOURCE_ADDRESS="$1" + RESOURCE_DEFINITION_NAME="$2" + + # query ES to see if the resource already exists + RESOURCE_STATUS=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}\n" "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS") + echo -e "\n>>> GET $RESOURCE_ADDRESS response code is $RESOURCE_STATUS" + + if [ $RESOURCE_STATUS -eq 200 ]; then + # resource already exists -> nothing to do + echo -e ">>> $RESOURCE_ADDRESS already exists ✓" + + elif [ $RESOURCE_STATUS -eq 404 ]; then + # resource doesn't exist -> need to create it + echo -e ">>> creating $RESOURCE_ADDRESS because it doesn't exist ..." + # use the file at given path as definition, but first replace all occurences of `PREFIX` + # placeholder within the file with the actual prefix value + TMP_SOURCE_PATH="/tmp/$RESOURCE_DEFINITION_NAME" + sed -e "s/PREFIX/$PREFIX/g" "$INDEX_DEFINITIONS_ROOT/$RESOURCE_DEFINITION_NAME" | tee -a "$TMP_SOURCE_PATH" + curl "${CURL_ARGS[@]}" -XPUT "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS" -H 'Content-Type: application/json' --data "@$TMP_SOURCE_PATH" + + elif [ $RESOURCE_STATUS -eq 403 ]; then + # probably authorization fail + echo -e ">>> forbidden access to $RESOURCE_ADDRESS ! -> exiting" exit 1 + else - echo -e "Got response code $TEMPLATE_RESPONSE_CODE while creating template so exiting." + # when `USE_AWS_ELASTICSEARCH` was forgotten to be set to `true` when running against AWS ES OSS, + # this script will use wrong paths (e.g. `_ilm/policy/` instead of AWS-compatible `_opendistro/_ism/policies/`) + # and the ES endpoint will return `401 Unauthorized` or `405 Method Not Allowed` + # let's use this as chance to point that wrong config might be used! + if [ $RESOURCE_STATUS -eq 401 ] || [ $RESOURCE_STATUS -eq 405 ]; then + if [[ $USE_AWS_ELASTICSEARCH == false ]] && [[ $ELASTICSEARCH_URL == *"amazonaws"* ]]; then + echo "... looks like AWS OpenSearch is used; please set USE_AWS_ELASTICSEARCH env value to true" + fi + fi + + echo -e ">>> failed to GET $RESOURCE_ADDRESS ! -> exiting" exit 1 fi } +# create indices for ES (non-AWS) +function create_datahub_usage_event_datastream() { + # non-AWS env requires creation of two resources for Datahub usage events: + # 1. ILM policy + create_if_not_exists "_ilm/policy/${PREFIX}datahub_usage_event_policy" policy.json + # 2. index template + create_if_not_exists "_index_template/${PREFIX}datahub_usage_event_index_template" index_template.json +} + +# create indices for ES OSS (AWS) function create_datahub_usage_event_aws_elasticsearch() { - if [[ -z "$INDEX_PREFIX" ]]; then - PREFIX='' - else - PREFIX="${INDEX_PREFIX}_" - fi + # AWS env requires creation of three resources for Datahub usage events: + # 1. ISM policy + create_if_not_exists "_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" aws_es_ism_policy.json - if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy") -eq 404 ] - then - echo -e "\ncreating datahub_usage_event_policy" - sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_ism_policy.json | tee -a /tmp/aws_es_ism_policy.json - curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/tmp/aws_es_ism_policy.json - else - echo -e "\ndatahub_usage_event_policy exists" - fi - if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template") -eq 404 ] - then - echo -e "\ncreating datahub_usage_event_index_template" - sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_index_template.json | tee -a /tmp/aws_es_index_template.json - curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/tmp/aws_es_index_template.json - curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "${ELASTICSEARCH_INSECURE}$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/${PREFIX}datahub_usage_event-000001" -H 'Content-Type: application/json' --data "{\"aliases\":{\"${PREFIX}datahub_usage_event\":{\"is_write_index\":true}}}" - else - echo -e "\ndatahub_usage_event_index_template exists" + # 2. index template + create_if_not_exists "_template/${PREFIX}datahub_usage_event_index_template" aws_es_index_template.json + + # 3. event index datahub_usage_event-000001 + # (note that AWS *rollover* indices need to use `^.*-\d+$` naming pattern) + # -> https://aws.amazon.com/premiumsupport/knowledge-center/opensearch-failed-rollover-index/ + INDEX_SUFFIX="000001" + # ... but first check whether `datahub_usage_event` wasn't already autocreated by GMS before `datahub_usage_event-000001` + # (as is common case when this script was initially run without properly setting `USE_AWS_ELASTICSEARCH` to `true`) + # -> https://github.com/datahub-project/datahub/issues/5376 + USAGE_EVENT_STATUS=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}\n" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event") + if [ $USAGE_EVENT_STATUS -eq 200 ]; then + USAGE_EVENT_DEFINITION=$(curl "${CURL_ARGS[@]}" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event") + # the definition is expected to contain "datahub_usage_event-000001" string + if [[ $USAGE_EVENT_DEFINITION != *"datahub_usage_event-$INDEX_SUFFIX"* ]]; then + # ... if it doesn't, we need to drop it + echo -e "\n>>> deleting invalid datahub_usage_event ..." + curl "${CURL_ARGS[@]}" -XDELETE "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event" + # ... and then recreate it below + fi fi + + # ... now we are safe to create the index + create_if_not_exists "${PREFIX}datahub_usage_event-$INDEX_SUFFIX" aws_es_index.json } if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then @@ -119,4 +158,4 @@ else elif [ $DATAHUB_USAGE_EVENT_INDEX_RESPONSE_CODE -eq 403 ]; then echo -e "Forbidden so exiting" fi -fi +fi \ No newline at end of file diff --git a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_index.json b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_index.json new file mode 100644 index 00000000000000..e3c66b61d3268b --- /dev/null +++ b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_index.json @@ -0,0 +1,7 @@ +{ + "aliases": { + "PREFIXdatahub_usage_event": { + "is_write_index": true + } + } +} From 05b9d4ef2f362f4c12da4433479c919a34081a8b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 29 Sep 2022 13:30:09 +0000 Subject: [PATCH 07/76] fix(doc): note that Athena doesn't support lineage (#6081) --- metadata-ingestion/src/datahub/ingestion/source/sql/athena.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index d456268bdb19dc..351a066e8f602f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -89,7 +89,6 @@ def get_sql_alchemy_url(self): "Optionally enabled via configuration. Profiling uses sql queries on whole table which can be expensive operation.", ) @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") -@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration") class AthenaSource(SQLAlchemySource): """ This plugin supports extracting the following metadata from Athena From 71f5ec7f19930f4bc52015591e05791773a48aaf Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 29 Sep 2022 13:33:49 +0000 Subject: [PATCH 08/76] fix(ingest): mssql - alias for mssql-odbc source (#6080) --- .../src/datahub/ingestion/source/source_registry.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/source_registry.py b/metadata-ingestion/src/datahub/ingestion/source/source_registry.py index 08c2ca4e2f8fdc..185a8be11a580e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/source_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/source/source_registry.py @@ -17,3 +17,11 @@ UserWarning("source type snowflake-beta is deprecated, use snowflake instead") ), ) + +# The MSSQL source has two possible sets of dependencies. We alias +# the second to the first so that we maintain the 1:1 mapping between +# source type and pip extra. +source_registry.register_alias( + "mssql-odbc", + "mssql", +) From a6930562fd8c67f7aa38d2d544ce9cdeef75ca57 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Thu, 29 Sep 2022 16:20:17 +0200 Subject: [PATCH 09/76] fix(ingest): presto-on-hive - Setting display name properly (#6065) --- .../ingestion/source/sql/presto_on_hive.py | 2 ++ .../presto_on_hive_mces_golden_1.json | 23 +++++++++++++------ .../presto_on_hive_mces_golden_2.json | 22 ++++++++++++------ 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index 1bad0e0a1be740..874dc72ac2a00f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -462,6 +462,7 @@ def loop_tables( properties["partitioned_columns"] = par_columns dataset_properties = DatasetPropertiesClass( + name=key.table, description=columns[-1]["description"], customProperties=properties, ) @@ -627,6 +628,7 @@ def loop_views( "is_view": "True", } dataset_properties = DatasetPropertiesClass( + name=dataset.dataset_name.split(".")[-1], description=None, customProperties=properties, ) diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json index 1b0c2609e5590d..f0d21ea8d2c577 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json @@ -176,10 +176,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" }, + "name": "map_test", "tags": [] } } @@ -341,10 +342,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" }, + "name": "union_test", "tags": [] } } @@ -498,10 +500,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" }, + "name": "nested_struct_test", "tags": [] } } @@ -639,10 +642,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" }, + "name": "array_struct_test", "description": "This table has array of structs", "tags": [] } @@ -775,10 +779,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" }, + "name": "struct_test", "tags": [] } } @@ -880,10 +885,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" }, + "name": "_test_table_underscore", "tags": [] } } @@ -998,11 +1004,12 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", "partitioned_columns": "baz" }, + "name": "pokes", "tags": [] } } @@ -1093,6 +1100,7 @@ "customProperties": { "is_view": "True" }, + "name": "array_struct_test_presto_view", "tags": [] } }, @@ -1251,6 +1259,7 @@ "customProperties": { "is_view": "True" }, + "name": "array_struct_test_view", "tags": [] } }, diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json index c2403ae18bf753..5125aa2c1929c6 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json @@ -176,10 +176,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" }, + "name": "map_test", "tags": [] } } @@ -341,10 +342,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" }, + "name": "union_test", "tags": [] } } @@ -498,10 +500,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" }, + "name": "nested_struct_test", "tags": [] } } @@ -639,10 +642,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" }, + "name": "array_struct_test", "description": "This table has array of structs", "tags": [] } @@ -775,10 +779,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" }, + "name": "struct_test", "tags": [] } } @@ -880,10 +885,11 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" }, + "name": "_test_table_underscore", "tags": [] } } @@ -998,11 +1004,12 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "create_date": "2022-09-20", + "create_date": "2022-09-29", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", "partitioned_columns": "baz" }, + "name": "pokes", "tags": [] } } @@ -1106,6 +1113,7 @@ "customProperties": { "is_view": "True" }, + "name": "array_struct_test_presto_view", "tags": [] } }, From 8163e1cbe93b531c23583ed43dcd85da7ae0d986 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Thu, 29 Sep 2022 08:38:19 -0700 Subject: [PATCH 10/76] fix(ui): fix schema infinite rerender (#6082) --- .../app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx index 404fa47d34ed3f..d920926adbfe2d 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx @@ -48,6 +48,9 @@ export type Props = { expandedRowsFromFilter?: Set; filterText?: string; }; + +const EMPTY_SET: Set = new Set(); + export default function SchemaTable({ rows, schemaMetadata, @@ -56,7 +59,7 @@ export default function SchemaTable({ editMode = true, schemaFieldBlameList, showSchemaAuditView, - expandedRowsFromFilter = new Set(), + expandedRowsFromFilter = EMPTY_SET, filterText = '', }: Props): JSX.Element { const hasUsageStats = useMemo(() => (usageStats?.aggregations?.fields?.length || 0) > 0, [usageStats]); From 4792ac56b795ef107a87313c4dd1a4f3b514e575 Mon Sep 17 00:00:00 2001 From: Peter Szalai Date: Thu, 29 Sep 2022 21:36:17 +0200 Subject: [PATCH 11/76] feat(monitoring): track graphql errors in metrics (#6087) * feat(monitoring): track graphql errors in metrics * fix(statuscode): use status code * address linting * address linting --- .../exception/DataHubGraphQLError.java | 4 ++++ .../datahub/graphql/GraphQLController.java | 20 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLError.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLError.java index d7e530d9dbe4d5..15c539a608cc05 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLError.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLError.java @@ -38,6 +38,10 @@ private Map buildExtensions(DataHubGraphQLErrorCode errorCode) { return extensions; } + public int getErrorCode() { + return errorCode.getCode(); + } + @Override public String getMessage() { return message; diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index 49e2b501b85917..c7f8ffbae1d683 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.datahub.graphql.GraphQLEngine; +import com.linkedin.datahub.graphql.exception.DataHubGraphQLError; import com.linkedin.metadata.utils.metrics.MetricUtils; import graphql.ExecutionResult; import java.util.Collections; @@ -126,9 +127,25 @@ void getGraphQL(HttpServletRequest request, HttpServletResponse response) { throw new UnsupportedOperationException("GraphQL gets not supported."); } + private void observeErrors(ExecutionResult executionResult) { + executionResult.getErrors().forEach(graphQLError -> { + if (graphQLError instanceof DataHubGraphQLError) { + DataHubGraphQLError dhGraphQLError = (DataHubGraphQLError) graphQLError; + int errorCode = dhGraphQLError.getErrorCode(); + MetricUtils.get().counter(MetricRegistry.name(this.getClass(), "errorCode", Integer.toString(errorCode))).inc(); + } else { + MetricUtils.get().counter(MetricRegistry.name(this.getClass(), "errorType", graphQLError.getErrorType().toString())).inc(); + } + }); + if (executionResult.getErrors().size() != 0) { + MetricUtils.get().counter(MetricRegistry.name(this.getClass(), "error")).inc(); + } + } + @SuppressWarnings("unchecked") private void submitMetrics(ExecutionResult executionResult) { try { + observeErrors(executionResult); Object tracingInstrumentation = executionResult.getExtensions().get("tracing"); if (tracingInstrumentation instanceof Map) { Map tracingMap = (Map) tracingInstrumentation; @@ -137,11 +154,12 @@ private void submitMetrics(ExecutionResult executionResult) { // Extract top level resolver, parent is top level query. Assumes single query per call. List> resolvers = (List>) executionData.get("resolvers"); Optional> - parentResolver = resolvers.stream().filter(resolver -> resolver.get("parentType").equals("Query")).findFirst(); + parentResolver = resolvers.stream().filter(resolver -> resolver.get("parentType").equals("Query")).findFirst(); String fieldName = parentResolver.isPresent() ? (String) parentResolver.get().get("fieldName") : "UNKNOWN"; MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), fieldName)).update(totalDuration); } } catch (Exception e) { + MetricUtils.get().counter(MetricRegistry.name(this.getClass(), "submitMetrics", "exception")).inc(); log.error("Unable to submit metrics for GraphQL call.", e); } } From 7359d922fcdf7043351c7e3a781d38eace97e935 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Thu, 29 Sep 2022 17:25:38 -0700 Subject: [PATCH 12/76] feat(advanced search): Add component to show all advanced search filters & add new filter (#6058) * adding select value modal * expanding gql model and adding advanced search filter component * adding advanced search filters component * removing console logs * improve readability * responding to comments --- .../search/AdvancedSearchAddFilterSelect.tsx | 51 +++++++ ...ncedSearchFilterOverallUnionTypeSelect.tsx | 44 +++++++ .../src/app/search/AdvancedSearchFilters.tsx | 124 ++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx create mode 100644 datahub-web-react/src/app/search/AdvancedSearchFilterOverallUnionTypeSelect.tsx create mode 100644 datahub-web-react/src/app/search/AdvancedSearchFilters.tsx diff --git a/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx b/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx new file mode 100644 index 00000000000000..5130ecc3b628b0 --- /dev/null +++ b/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx @@ -0,0 +1,51 @@ +import { Select } from 'antd'; +import * as React from 'react'; +import styled from 'styled-components'; +import { PlusOutlined } from '@ant-design/icons'; + +import { FacetFilterInput } from '../../types.generated'; +import { FIELD_TO_LABEL } from './utils/constants'; + +const StyledPlus = styled(PlusOutlined)` + margin-right: 6px; +`; + +interface Props { + selectedFilters: Array; + onFilterFieldSelect: (value) => void; +} + +const { Option } = Select; + +export const AdvancedSearchAddFilterSelect = ({ selectedFilters, onFilterFieldSelect }: Props) => { + return ( + + ); +}; diff --git a/datahub-web-react/src/app/search/AdvancedSearchFilterOverallUnionTypeSelect.tsx b/datahub-web-react/src/app/search/AdvancedSearchFilterOverallUnionTypeSelect.tsx new file mode 100644 index 00000000000000..d74a4c2bc3229b --- /dev/null +++ b/datahub-web-react/src/app/search/AdvancedSearchFilterOverallUnionTypeSelect.tsx @@ -0,0 +1,44 @@ +import { Select } from 'antd'; +import React from 'react'; +import styled from 'styled-components/macro'; + +import { ANTD_GRAY } from '../entity/shared/constants'; +import { UnionType } from './utils/constants'; + +type Props = { + unionType: UnionType; + onUpdate: (newValue: UnionType) => void; +}; + +const { Option } = Select; + +const StyledSelect = styled(Select)` + border-radius: 5px; + background: ${ANTD_GRAY[4]}; + :hover { + background: ${ANTD_GRAY[4.5]}; + } +`; + +export const AdvancedSearchFilterOverallUnionTypeSelect = ({ unionType, onUpdate }: Props) => { + return ( + <> + { + if ((newValue as any) !== unionType) { + onUpdate(newValue as any); + } + }} + size="small" + dropdownMatchSelectWidth={false} + > + + + + + ); +}; diff --git a/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx b/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx new file mode 100644 index 00000000000000..dedbec352ffc0d --- /dev/null +++ b/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx @@ -0,0 +1,124 @@ +import * as React from 'react'; +import { useState } from 'react'; +import styled from 'styled-components'; + +import { FacetFilterInput, FacetMetadata, SearchCondition } from '../../types.generated'; +import { ANTD_GRAY } from '../entity/shared/constants'; +import { AdvancedSearchFilter } from './AdvancedSearchFilter'; +import { AdvancedSearchFilterOverallUnionTypeSelect } from './AdvancedSearchFilterOverallUnionTypeSelect'; +import { AdvancedFilterSelectValueModal } from './AdvancedFilterSelectValueModal'; +import { FIELDS_THAT_USE_CONTAINS_OPERATOR, UnionType } from './utils/constants'; +import { AdvancedSearchAddFilterSelect } from './AdvancedSearchAddFilterSelect'; + +export const SearchFilterWrapper = styled.div` + min-height: 100%; + overflow: auto; + margin-top: 6px; + margin-left: 12px; + margin-right: 12px; + + &::-webkit-scrollbar { + height: 12px; + width: 1px; + background: #f2f2f2; + } + &::-webkit-scrollbar-thumb { + background: #cccccc; + -webkit-border-radius: 1ex; + -webkit-box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.75); + } +`; + +const AnyAllSection = styled.div` + padding: 6px; + color: ${ANTD_GRAY[8]}; +`; + +const EmptyStateSection = styled.div` + border-radius: 5px; + background-color: ${ANTD_GRAY[2]}; + padding: 22px; + margin-top: 10px; +`; + +interface Props { + selectedFilters: Array; + facets: Array; + onFilterSelect: (newFilters: Array) => void; + onChangeUnionType: (unionType: UnionType) => void; + unionType?: UnionType; +} + +export const AdvancedSearchFilters = ({ + unionType = UnionType.AND, + facets, + selectedFilters, + onFilterSelect, + onChangeUnionType, +}: Props) => { + const [filterField, setFilterField] = useState(null); + + const onFilterFieldSelect = (value) => { + setFilterField(value.value); + }; + + const onSelectValueFromModal = (values) => { + if (!filterField) return; + + const newFilter: FacetFilterInput = { + field: filterField, + values: values as string[], + value: '', // TODO(Gabe): remove once we refactor the model + condition: FIELDS_THAT_USE_CONTAINS_OPERATOR.includes(filterField) + ? SearchCondition.Contain + : SearchCondition.Equal, + }; + onFilterSelect([...selectedFilters, newFilter]); + }; + + return ( + + + {selectedFilters?.length >= 2 && ( + + Show results that match{' '} + onChangeUnionType(newValue)} + /> + + )} + {selectedFilters.map((filter) => ( + facet.field === filter.field) || facets[0]} + filter={filter} + onClose={() => { + onFilterSelect(selectedFilters.filter((f) => f !== filter)); + }} + onUpdate={(newValue) => { + onFilterSelect( + selectedFilters.map((f) => { + if (f === filter) { + return newValue; + } + return f; + }), + ); + }} + /> + ))} + {filterField && ( + facet.field === filterField) || null} + onCloseModal={() => setFilterField(null)} + filterField={filterField} + onSelect={onSelectValueFromModal} + /> + )} + {selectedFilters?.length === 0 && No filters applied, add one above.} + + ); +}; From 446b10dcc3d696610dc17234d40392651423643a Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 30 Sep 2022 03:46:03 +0000 Subject: [PATCH 13/76] fix(ingest): bump `lkml` version (#6091) --- metadata-ingestion/setup.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index aea7ac9f2a15a3..4c547cee1be993 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -267,9 +267,16 @@ def get_long_description(): "kafka-connect": sql_common | {"requests", "JPype1"}, "ldap": {"python-ldap>=2.4"}, "looker": looker_common, - # lkml>=1.1.2 is required to support the sql_preamble expression in LookML "lookml": looker_common - | {"lkml>=1.1.2", "sql-metadata==2.2.2", "sqllineage==1.3.6", "GitPython>2"}, + | { + # This version of lkml contains a fix for parsing lists in + # LookML files with spaces between an item and the following comma. + # See https://github.com/joshtemple/lkml/issues/73. + "lkml>=1.3.0b5", + "sql-metadata==2.2.2", + "sqllineage==1.3.6", + "GitPython>2", + }, "metabase": {"requests", "sqllineage==1.3.6"}, "mode": {"requests", "sqllineage==1.3.6", "tenacity>=8.0.1"}, "mongodb": {"pymongo[srv]>=3.11", "packaging"}, From 3d8d8c6f8e221693f761fa637319ab12aa51cff0 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Fri, 30 Sep 2022 01:03:22 -0700 Subject: [PATCH 14/76] fix(ingest): lookml - extract column correctly (#6093) --- .../src/datahub/ingestion/source/looker/lookml_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 0f6e1bba64bc9b..4803a75626585c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -597,7 +597,7 @@ def _get_fields( if type_cls == ViewFieldType.DIMENSION and extract_column_level_lineage: if field_dict.get("sql") is not None: upstream_field_match = re.match( - r"^.*\${TABLE}\.(.*)$", field_dict["sql"] + r"^.*\${TABLE}\.(\w+)", field_dict["sql"] ) if upstream_field_match: matched_field = upstream_field_match.group(1) From 79575b28152ef3e30d1d6a3a2e86b2b196df9226 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 30 Sep 2022 16:11:39 +0530 Subject: [PATCH 15/76] feat(retention): change default policy, add API to apply retention (#6088) --- .github/pr-labeler-config.yml | 1 + .../metadata/entity/EntityService.java | 23 +++ .../metadata/entity/RetentionService.java | 8 + .../cassandra/CassandraRetentionService.java | 8 + .../entity/ebean/EbeanRetentionService.java | 147 ++++++++++++++---- .../retention/BulkApplyRetentionArgs.java | 12 ++ .../retention/BulkApplyRetentionResult.java | 16 ++ ...com.linkedin.entity.entities.restspec.json | 24 +++ ...com.linkedin.entity.entities.snapshot.json | 36 ++++- ...m.linkedin.platform.platform.snapshot.json | 12 +- .../resources/entity/EntityResource.java | 14 ++ .../src/main/resources/boot/retention.yaml | 12 +- 12 files changed, 273 insertions(+), 40 deletions(-) create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java diff --git a/.github/pr-labeler-config.yml b/.github/pr-labeler-config.yml index 75dcd272c21602..0689e13fa159c3 100644 --- a/.github/pr-labeler-config.yml +++ b/.github/pr-labeler-config.yml @@ -9,6 +9,7 @@ product: - 'datahub-web-react/**/*' - 'datahub-frontend/**/*' - 'datahub-graphql-core/**/*' +- 'metadata-io/**/*' docs: - 'docs/**/*' \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java index fe163cd0c78b29..9f0e9550537324 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -41,6 +41,8 @@ import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; import com.linkedin.metadata.entity.validation.EntityRegistryUrnValidator; import com.linkedin.metadata.entity.validation.RecordTemplateValidator; import com.linkedin.metadata.entity.validation.ValidationUtils; @@ -986,6 +988,27 @@ private UpdateAspectResult patchAspect(final Patch patch, final SystemMetadata s return result; } + public String batchApplyRetention(Integer start, Integer count, Integer attemptWithVersion, String aspectName, + String urn) { + BulkApplyRetentionArgs args = new BulkApplyRetentionArgs(); + if (start == null) { + start = 0; + } + args.start = start; + if (count == null) { + count = 100; + } + args.count = count; + if (attemptWithVersion == null) { + attemptWithVersion = 21; + } + args.attemptWithVersion = attemptWithVersion; + args.aspectName = aspectName; + args.urn = urn; + BulkApplyRetentionResult result = _retentionService.batchApplyRetentionEntities(args); + return result.toString(); + } + private boolean emitChangeLog(@Nullable RecordTemplate oldAspect, @Nullable SystemMetadata oldSystemMetadata, RecordTemplate newAspect, SystemMetadata newSystemMetadata, MetadataChangeProposal mcp, Urn entityUrn, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java index 89e11373d3e6b6..14ec3a621cbfcc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java @@ -7,6 +7,8 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; import com.linkedin.metadata.key.DataHubRetentionKey; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -184,6 +186,12 @@ public abstract void applyRetention(@Nonnull Urn urn, @Nonnull String aspectName */ public abstract void batchApplyRetention(@Nullable String entityName, @Nullable String aspectName); + /** + * Batch apply retention to all records within the start, end count + */ + public abstract BulkApplyRetentionResult batchApplyRetentionEntities(@Nonnull BulkApplyRetentionArgs args); + + @Value public static class RetentionContext { Optional maxVersion; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java index 90fd6f0297ce5a..319da182d9b7a7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java @@ -14,6 +14,8 @@ import com.linkedin.metadata.entity.RetentionService; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.EntityAspect; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; import com.linkedin.retention.DataHubRetentionConfig; import com.linkedin.retention.Retention; import com.linkedin.retention.TimeBasedRetention; @@ -112,6 +114,12 @@ public void batchApplyRetention(@Nullable String entityName, @Nullable String as log.info("Finished applying retention to all records"); } + @Override + public BulkApplyRetentionResult batchApplyRetentionEntities(@Nonnull BulkApplyRetentionArgs args) { + log.error("batchApplyRetentionEntities not implemented for cassandra"); + return null; + } + private void applyVersionBasedRetention( @Nonnull final Urn urn, @Nonnull final String aspectName, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java index 81101eac3de7b1..b507beda7758e9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -4,6 +4,8 @@ import com.datahub.util.RecordUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; +import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; import com.linkedin.retention.DataHubRetentionConfig; import com.linkedin.retention.Retention; import com.linkedin.retention.TimeBasedRetention; @@ -12,6 +14,7 @@ import io.ebean.Expression; import io.ebean.ExpressionList; import io.ebean.PagedList; +import io.ebean.Query; import io.ebean.Transaction; import io.ebeaninternal.server.expression.Op; import io.ebeaninternal.server.expression.SimpleExpression; @@ -111,11 +114,50 @@ private Expression getTimeBasedRetentionQuery(@Nonnull final TimeBasedRetention new Timestamp(_clock.millis() - retention.getMaxAgeInSeconds() * 1000)); } + private void applyRetention( + PagedList rows, + Map retentionPolicyMap, + BulkApplyRetentionResult applyRetentionResult + ) { + try (Transaction transaction = _server.beginTransaction()) { + transaction.setBatchMode(true); + transaction.setBatchSize(_batchSize); + for (EbeanAspectV2 row : rows.getList()) { + // Only run for cases where there's multiple versions of the aspect + if (row.getVersion() == 0) { + continue; + } + // 1. Extract an Entity type from the entity Urn + Urn urn; + try { + urn = Urn.createFromString(row.getUrn()); + } catch (Exception e) { + log.error("Failed to serialize urn {}", row.getUrn(), e); + continue; + } + final String aspectNameFromRecord = row.getAspect(); + log.debug("Handling urn {} aspect {}", row.getUrn(), row.getAspect()); + // Get the retention policies to apply from the local retention policy map + Optional retentionPolicy = getRetentionKeys(urn.getEntityType(), aspectNameFromRecord).stream() + .map(key -> retentionPolicyMap.get(key.toString())) + .filter(Objects::nonNull) + .findFirst() + .map(DataHubRetentionConfig::getRetention); + retentionPolicy.ifPresent(retention -> applyRetention(urn, aspectNameFromRecord, retention, + Optional.of(new RetentionContext(Optional.of(row.getVersion()))))); + if (applyRetentionResult != null) { + applyRetentionResult.rowsHandled += 1; + } + } + transaction.commit(); + } + } + @Override @WithSpan public void batchApplyRetention(@Nullable String entityName, @Nullable String aspectName) { log.debug("Applying retention to all records"); - int numCandidates = queryCandidates(entityName, aspectName).findCount(); + int numCandidates = queryCandidates(null, entityName, aspectName).findCount(); log.info("Found {} urn, aspect pair with more than 1 version", numCandidates); Map retentionPolicyMap = getAllRetentionPolicies(); @@ -123,40 +165,77 @@ public void batchApplyRetention(@Nullable String entityName, @Nullable String as while (start < numCandidates) { log.info("Applying retention to pairs {} through {}", start, start + _batchSize); PagedList rows = getPagedAspects(entityName, aspectName, start, _batchSize); + applyRetention(rows, retentionPolicyMap, null); + start += _batchSize; + } - try (Transaction transaction = _server.beginTransaction()) { - transaction.setBatchMode(true); - transaction.setBatchSize(_batchSize); - for (EbeanAspectV2 row : rows.getList()) { - // Only run for cases where there's multiple versions of the aspect - if (row.getVersion() == 0) { - continue; - } - // 1. Extract an Entity type from the entity Urn - Urn urn; - try { - urn = Urn.createFromString(row.getUrn()); - } catch (Exception e) { - log.error("Failed to serialize urn {}", row.getUrn(), e); - continue; - } - final String aspectNameFromRecord = row.getAspect(); - // Get the retention policies to apply from the local retention policy map - Optional retentionPolicy = getRetentionKeys(urn.getEntityType(), aspectNameFromRecord).stream() - .map(key -> retentionPolicyMap.get(key.toString())) - .filter(Objects::nonNull) - .findFirst() - .map(DataHubRetentionConfig::getRetention); - retentionPolicy.ifPresent(retention -> applyRetention(urn, aspectNameFromRecord, retention, - Optional.of(new RetentionContext(Optional.of(row.getVersion()))))); - } - transaction.commit(); + log.info("Finished applying retention to all records"); + } + + @Override + public BulkApplyRetentionResult batchApplyRetentionEntities(@Nonnull BulkApplyRetentionArgs args) { + long startTime = System.currentTimeMillis(); + + BulkApplyRetentionResult result = new BulkApplyRetentionResult(); + result.argStart = args.start; + result.argCount = args.count; + result.argAttemptWithVersion = args.attemptWithVersion; + result.argAspectName = args.aspectName; + result.argUrn = args.urn; + + Map retentionPolicyMap = getAllRetentionPolicies(); + result.timeRetentionPolicyMapMs = System.currentTimeMillis() - startTime; + startTime = System.currentTimeMillis(); + + //only supports version based retention for batch apply + //find urn, aspect pair where distinct versions > 20 to apply retention policy + Query query = _server.find(EbeanAspectV2.class) + .setDistinct(true) + .select(String.format( + "%s, %s, count(%s)", EbeanAspectV2.URN_COLUMN, EbeanAspectV2.ASPECT_COLUMN, EbeanAspectV2.VERSION_COLUMN) + ); + ExpressionList exp = null; + if (args.urn != null || args.aspectName != null) { + exp = query.where(); + if (args.aspectName != null) { + exp = exp.eq(EbeanAspectV2.ASPECT_COLUMN, args.aspectName); } + if (args.urn != null) { + exp = exp.eq(EbeanAspectV2.URN_COLUMN, args.urn); + } + } + if (exp == null) { + exp = query.having(); + } else { + exp = exp.having(); + } - start += _batchSize; + PagedList rows = exp + .gt(String.format("count(%s)", EbeanAspectV2.VERSION_COLUMN), args.attemptWithVersion) + .setFirstRow(args.start) + .setMaxRows(args.count) + .findPagedList(); + result.timeRowMs = System.currentTimeMillis() - startTime; + + for (EbeanAspectV2 row : rows.getList()) { + startTime = System.currentTimeMillis(); + log.debug("For {},{} version count is {}", row.getUrn(), row.getAspect(), row.getVersion()); + try { + Urn.createFromString(row.getUrn()); + } catch (Exception e) { + log.error("Failed to serialize urn {}", row.getUrn(), e); + continue; + } + PagedList rowsToChange = queryCandidates(row.getUrn(), null, row.getAspect()) + .setFirstRow(args.start) + .setMaxRows(args.count) + .findPagedList(); + + applyRetention(rowsToChange, retentionPolicyMap, result); + result.timeApplyRetentionMs += System.currentTimeMillis() - startTime; } - log.info("Finished applying retention to all records"); + return result; } private Map getAllRetentionPolicies() { @@ -172,12 +251,16 @@ private Map getAllRetentionPolicies() { row -> RecordUtils.toRecordTemplate(DataHubRetentionConfig.class, row.getMetadata()))); } - private ExpressionList queryCandidates(@Nullable String entityName, @Nullable String aspectName) { + private ExpressionList queryCandidates(@Nullable String urn, + @Nullable String entityName, @Nullable String aspectName) { ExpressionList query = _server.find(EbeanAspectV2.class) .setDistinct(true) .select(String.format("%s, %s, max(%s)", EbeanAspectV2.URN_COLUMN, EbeanAspectV2.ASPECT_COLUMN, EbeanAspectV2.VERSION_COLUMN)) .where(); + if (urn != null) { + query.eq(EbeanAspectV2.URN_COLUMN, urn); + } if (entityName != null) { query.like(EbeanAspectV2.URN_COLUMN, String.format("urn:li:%s%%", entityName)); } @@ -189,7 +272,7 @@ private ExpressionList queryCandidates(@Nullable String entityNam private PagedList getPagedAspects(@Nullable String entityName, @Nullable String aspectName, final int start, final int pageSize) { - return queryCandidates(entityName, aspectName).orderBy( + return queryCandidates(null, entityName, aspectName).orderBy( EbeanAspectV2.URN_COLUMN + ", " + EbeanAspectV2.ASPECT_COLUMN) .setFirstRow(start) .setMaxRows(pageSize) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java new file mode 100644 index 00000000000000..0d9126026b9c8f --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.entity.retention; + +import lombok.Data; + +@Data +public class BulkApplyRetentionArgs { + public Integer start; + public Integer count; + public Integer attemptWithVersion; + public String aspectName; + public String urn; +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java new file mode 100644 index 00000000000000..ef032496c8451e --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java @@ -0,0 +1,16 @@ +package com.linkedin.metadata.entity.retention; + +import lombok.Data; + +@Data +public class BulkApplyRetentionResult { + public long argStart; + public long argCount; + public long argAttemptWithVersion; + public String argUrn; + public String argAspectName; + public long rowsHandled = 0; + public long timeRetentionPolicyMapMs; + public long timeRowMs; + public long timeApplyRetentionMs = 0; +} diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json index 90c186622ff166..1e599c1221a51e 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json @@ -27,6 +27,30 @@ } ] } ], "actions" : [ { + "name" : "applyRetention", + "parameters" : [ { + "name" : "start", + "type" : "int", + "optional" : true + }, { + "name" : "count", + "type" : "int", + "optional" : true + }, { + "name" : "attemptWithVersion", + "type" : "int", + "optional" : true + }, { + "name" : "aspectName", + "type" : "string", + "optional" : true + }, { + "name" : "urn", + "type" : "string", + "optional" : true + } ], + "returns" : "string" + }, { "name" : "autocomplete", "parameters" : [ { "name" : "entity", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index fb4cf8de0c083f..bf1af17fdbec91 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -1620,6 +1620,11 @@ "name" : "name", "type" : "string", "doc" : "Name of the data platform", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : false, + "fieldType" : "TEXT_PARTIAL" + }, "validate" : { "strlen" : { "max" : 15 @@ -1629,7 +1634,12 @@ "name" : "displayName", "type" : "string", "doc" : "The name that will be used for displaying a platform type.", - "optional" : true + "optional" : true, + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } }, { "name" : "type", "type" : { @@ -5646,6 +5656,30 @@ } ] } ], "actions" : [ { + "name" : "applyRetention", + "parameters" : [ { + "name" : "start", + "type" : "int", + "optional" : true + }, { + "name" : "count", + "type" : "int", + "optional" : true + }, { + "name" : "attemptWithVersion", + "type" : "int", + "optional" : true + }, { + "name" : "aspectName", + "type" : "string", + "optional" : true + }, { + "name" : "urn", + "type" : "string", + "optional" : true + } ], + "returns" : "string" + }, { "name" : "autocomplete", "parameters" : [ { "name" : "entity", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 7f94911b919029..30715f3d2446b1 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -1620,6 +1620,11 @@ "name" : "name", "type" : "string", "doc" : "Name of the data platform", + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : false, + "fieldType" : "TEXT_PARTIAL" + }, "validate" : { "strlen" : { "max" : 15 @@ -1629,7 +1634,12 @@ "name" : "displayName", "type" : "string", "doc" : "The name that will be used for displaying a platform type.", - "optional" : true + "optional" : true, + "Searchable" : { + "boostScore" : 10.0, + "enableAutocomplete" : true, + "fieldType" : "TEXT_PARTIAL" + } }, { "name" : "type", "type" : { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 25aea69130b6b8..90eec1000a969c 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -93,6 +93,7 @@ public class EntityResource extends CollectionResourceTaskTemplate listUrns(@ActionParam(PARAM_ENTITY) @Nonnull String return RestliUtil.toTask(() -> _entityService.listUrns(entityName, start, count), "listUrns"); } + @Action(name = ACTION_APPLY_RETENTION) + @Nonnull + @WithSpan + public Task applyRetention(@ActionParam(PARAM_START) @Optional @Nullable Integer start, + @ActionParam(PARAM_COUNT) @Optional @Nullable Integer count, + @ActionParam("attemptWithVersion") @Optional @Nullable Integer attemptWithVersion, + @ActionParam(PARAM_ASPECT_NAME) @Optional @Nullable String aspectName, + @ActionParam(PARAM_URN) @Optional @Nullable String urn + ) { + return RestliUtil.toTask(() -> _entityService.batchApplyRetention( + start, count, attemptWithVersion, aspectName, urn), ACTION_APPLY_RETENTION); + } + @Action(name = ACTION_FILTER) @Nonnull @WithSpan diff --git a/metadata-service/war/src/main/resources/boot/retention.yaml b/metadata-service/war/src/main/resources/boot/retention.yaml index df22026de813cc..630d2ce7ad0e67 100644 --- a/metadata-service/war/src/main/resources/boot/retention.yaml +++ b/metadata-service/war/src/main/resources/boot/retention.yaml @@ -4,12 +4,12 @@ retention: version: maxVersions: 5 -#- entity: "*" -# aspect: "*" -# config: -# retention: -# version: -# maxVersions: 20 +- entity: "*" + aspect: "*" + config: + retention: + version: + maxVersions: 20 #- entity: dataset # aspect: datasetProperties # config: From 266216311ebb87e583defcc6f07578b677b3b357 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Fri, 30 Sep 2022 13:52:28 -0700 Subject: [PATCH 16/76] fix(lineage): fix missed casing in lineage registry (#6078) * fix backwards case sensitive lineage * update tests * make EdgeInfo equals and hashcode case resilient * updating lint Co-authored-by: John Joyce --- .../models/registry/LineageRegistry.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/LineageRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/LineageRegistry.java index ab57d19bf3ca32..2dc2ef9d2bd0c8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/LineageRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/LineageRegistry.java @@ -132,5 +132,27 @@ public static class EdgeInfo { String type; RelationshipDirection direction; String opposingEntityType; + + @Override + public boolean equals(Object o) { + if (o == this) { + return true; + } + + if (o instanceof EdgeInfo) { + return ((EdgeInfo) o).type.equalsIgnoreCase(this.type) + && ((EdgeInfo) o).direction.equals(this.direction) + && ((EdgeInfo) o).opposingEntityType.equalsIgnoreCase(this.opposingEntityType); + } + return false; + } + + @Override + public int hashCode() { + return ((this.type == null ? 0 : this.type.toLowerCase().hashCode()) + ^ (this.direction == null ? 0 : this.direction.hashCode()) + ^ (this.opposingEntityType == null ? 0 : this.opposingEntityType.toLowerCase().hashCode())); + } } + } From 05f5c123bca8710a39ef6e729bcf8b9f46540ff5 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Sat, 1 Oct 2022 14:39:04 +0200 Subject: [PATCH 17/76] fix(ingest): bigquery-beta - Lowering a bit memory footprint of bigquery usage (#6095) * Lowering a bit memory footprint of bigquery usage * Filtering out not seen tables from usage generation --- .../ingestion/source/bigquery_v2/bigquery.py | 24 ++++++++++-- .../ingestion/source/bigquery_v2/usage.py | 38 ++++++++++++------- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 2dd1ad84e488c1..e54faea2e7130f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -445,9 +445,6 @@ def get_workunits(self) -> Iterable[WorkUnit]: logger.info(f"Processing project: {project_id.id}") yield from self._process_project(conn, project_id) - if self.config.include_usage_statistics: - yield from self.usage_extractor.get_workunits() - if self.config.profiling.enabled: yield from self.profiler.get_workunits(self.db_tables) @@ -493,7 +490,26 @@ def _process_project( if self.config.include_usage_statistics: logger.info(f"Generate usage for {project_id}") - yield from self.usage_extractor.generate_usage_for_project(project_id) + tables: Dict[str, List[str]] = {} + + for dataset in self.db_tables[project_id]: + tables[dataset] = [ + table.name for table in self.db_tables[project_id][dataset] + ] + + for dataset in self.db_views[project_id]: + if not tables[dataset]: + tables[dataset] = [ + table.name for table in self.db_views[project_id][dataset] + ] + else: + tables[dataset].extend( + [table.name for table in self.db_views[project_id][dataset]] + ) + + yield from self.usage_extractor.generate_usage_for_project( + project_id, tables + ) def _process_schema( self, conn: bigquery.Client, project_id: str, bigquery_dataset: BigqueryDataset diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index cfd25bc5319b3d..119d8fa49b171d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -150,15 +150,10 @@ class BigQueryUsageExtractor: * Aggregation of these statistics into buckets, by day or hour granularity :::note - 1. This source only does usage statistics. To get the tables, views, and schemas in your BigQuery project, use the `bigquery` plugin. - 2. Depending on the compliance policies setup for the bigquery instance, sometimes logging.read permission is not sufficient. In that case, use either admin or private log viewer permission. + 1. Depending on the compliance policies setup for the bigquery instance, sometimes logging.read permission is not sufficient. In that case, use either admin or private log viewer permission. ::: """ - aggregated_info: Dict[ - datetime, Dict[BigQueryTableRef, AggregatedDataset] - ] = collections.defaultdict(dict) - def __init__(self, config: BigQueryV2Config, report: BigQueryV2Report): self.config: BigQueryV2Config = config self.report: BigQueryV2Report = report @@ -173,7 +168,13 @@ def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool: and self.config.table_pattern.allowed(table_ref.table_identifier.table) ) - def generate_usage_for_project(self, project_id: str) -> Iterable[MetadataWorkUnit]: + def generate_usage_for_project( + self, project_id: str, tables: Dict[str, List[str]] + ) -> Iterable[MetadataWorkUnit]: + aggregated_info: Dict[ + datetime, Dict[BigQueryTableRef, AggregatedDataset] + ] = collections.defaultdict(dict) + parsed_bigquery_log_events: Iterable[ Union[ReadEvent, QueryEvent, MetadataWorkUnit] ] @@ -221,24 +222,26 @@ def generate_usage_for_project(self, project_id: str) -> Iterable[MetadataWorkUn yield operational_wu self.report.num_operational_stats_workunits_emitted += 1 if event.read_event: - self.aggregated_info = self._aggregate_enriched_read_events( - self.aggregated_info, event + aggregated_info = self._aggregate_enriched_read_events( + aggregated_info, event, tables ) num_aggregated += 1 logger.info(f"Total number of events aggregated = {num_aggregated}.") bucket_level_stats: str = "\n\t" + "\n\t".join( [ f'bucket:{db.strftime("%m-%d-%Y:%H:%M:%S")}, size={len(ads)}' - for db, ads in self.aggregated_info.items() + for db, ads in aggregated_info.items() ] ) logger.debug( - f"Number of buckets created = {len(self.aggregated_info)}. Per-bucket details:{bucket_level_stats}" + f"Number of buckets created = {len(aggregated_info)}. Per-bucket details:{bucket_level_stats}" ) self.report.usage_extraction_sec[project_id] = round( timer.elapsed_seconds(), 2 ) + + yield from self.get_workunits(aggregated_info) except Exception as e: self.report.usage_failed_extraction.append(project_id) logger.error( @@ -746,6 +749,7 @@ def _aggregate_enriched_read_events( self, datasets: Dict[datetime, Dict[BigQueryTableRef, AggregatedDataset]], event: AuditEvent, + tables: Dict[str, List[str]], ) -> Dict[datetime, Dict[BigQueryTableRef, AggregatedDataset]]: if not event.read_event: return datasets @@ -756,6 +760,12 @@ def _aggregate_enriched_read_events( resource: Optional[BigQueryTableRef] = None try: resource = event.read_event.resource.get_sanitized_table_ref() + if ( + resource.table_identifier.get_table_display_name() + not in tables[resource.table_identifier.dataset] + ): + logger.debug(f"Skipping non existing {resource} from usage") + return datasets except Exception as e: self.report.report_warning( str(event.read_event.resource), f"Failed to clean up resource, {e}" @@ -787,9 +797,11 @@ def _aggregate_enriched_read_events( return datasets - def get_workunits(self): + def get_workunits( + self, aggregated_info: Dict[datetime, Dict[BigQueryTableRef, AggregatedDataset]] + ) -> Iterable[MetadataWorkUnit]: self.report.num_usage_workunits_emitted = 0 - for time_bucket in self.aggregated_info.values(): + for time_bucket in aggregated_info.values(): for aggregate in time_bucket.values(): wu = self._make_usage_stat(aggregate) self.report.report_workunit(wu) From 48b46971b8f40fd68c83d0afe1a6ca5f005dfa7d Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sun, 2 Oct 2022 16:46:34 -0700 Subject: [PATCH 18/76] feat(ingest): remove hardcoded env variable default for cli version (#6075) --- .github/workflows/build-and-test.yml | 2 + .../ingest/source/builder/NameSourceStep.tsx | 2 +- .../datahub-gms/env/docker-without-neo4j.env | 3 +- docker/datahub-gms/env/docker.env | 1 - ...ocker-compose-without-neo4j.quickstart.yml | 1 - .../quickstart/docker-compose.quickstart.yml | 1 - gradle/versioning/versioning.gradle | 86 +++++++++++++++++++ metadata-service/factories/build.gradle | 5 ++ .../src/main/resources/application.yml | 2 +- 9 files changed, 96 insertions(+), 7 deletions(-) create mode 100644 gradle/versioning/versioning.gradle diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 5f96dceb5b5518..3a4b889a60caba 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -25,6 +25,8 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - name: Set up JDK 11 uses: actions/setup-java@v1 with: diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 138136c64695d6..d7c1a2e4bd54ed 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -98,7 +98,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) Advanced: Provide a custom CLI version to use for ingestion. setVersion(event.target.value)} /> diff --git a/docker/datahub-gms/env/docker-without-neo4j.env b/docker/datahub-gms/env/docker-without-neo4j.env index 45b8e8c5eda54b..6a085266d5c88a 100644 --- a/docker/datahub-gms/env/docker-without-neo4j.env +++ b/docker/datahub-gms/env/docker-without-neo4j.env @@ -16,7 +16,6 @@ MAE_CONSUMER_ENABLED=true MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 ENTITY_SERVICE_ENABLE_RETENTION=true # Uncomment to disable persistence of client-side analytics events @@ -46,4 +45,4 @@ ENTITY_SERVICE_ENABLE_RETENTION=true # Uncomment to run a one-time upgrade to migrate legacy default browse path format to latest format # More details can be found at https://datahubproject.io/docs/advanced/browse-paths-upgrade -# UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED=true \ No newline at end of file +# UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED=true diff --git a/docker/datahub-gms/env/docker.env b/docker/datahub-gms/env/docker.env index 1b859aa59b144a..7c0297a3cd8ba9 100644 --- a/docker/datahub-gms/env/docker.env +++ b/docker/datahub-gms/env/docker.env @@ -20,7 +20,6 @@ MAE_CONSUMER_ENABLED=true MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 # Uncomment to enable Metadata Service Authentication # METADATA_SERVICE_AUTH_ENABLED=true diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 05ca6576787b67..b007b59d52841c 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -82,7 +82,6 @@ services: - MCE_CONSUMER_ENABLED=true - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true - - UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 - ENTITY_SERVICE_ENABLE_RETENTION=true hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index dc35e0c935c2ff..a7018a176f81ce 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -89,7 +89,6 @@ services: - MCE_CONSUMER_ENABLED=true - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true - - UI_INGESTION_DEFAULT_CLI_VERSION=0.8.42 hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} ports: diff --git a/gradle/versioning/versioning.gradle b/gradle/versioning/versioning.gradle new file mode 100644 index 00000000000000..50fd3e844bd409 --- /dev/null +++ b/gradle/versioning/versioning.gradle @@ -0,0 +1,86 @@ +/** + Applies a consistent versioning scheme to all projects using this script + +Uses git tags to mint versions by default. +git tags can be of a few forms: +- short sha (typical for a PR or a commit) (e.g. 38960ae) +- versioned tags (typical for a release) (e.g. v0.8.45, v0.8.45.1, v0.8.45rc1, v0.8.45.1rc4) + +Produces the following variables and supports token replacement +- version: server version amenable for creating jars +- fullVersion: full version string +- cliMajorVersion: cli version amenable for binding to server as a default + 0.8.44 or 0.8.44-1 (for clean tags) or 0.8.45-SNAPSHOT (for unclean repositories) + + All inference can be overridden by passing in the releaseVersion property + e.g. -PreleaseVersion=0.2.3.4 will set the jar version to 0.2.3-4 + + **/ + + +import org.apache.tools.ant.filters.ReplaceTokens + +def detailedVersionString = "0.0.0-unknown-SNAPSHOT" +def cliMajorVersion = "0.8.42" // base default cli major version +def snapshotVersion = false +if (project.hasProperty("releaseVersion")) { + version = releaseVersion + detailedVersionString = releaseVersion +} else { + try { + // apply this plugin in a try-catch block so that we can handle cases without .git directory + apply plugin: "com.palantir.git-version" + def details = versionDetails() + detailedVersionString = gitVersion() + version = details.lastTag + version = version.startsWith("v")? version.substring(1): version + def suffix = details.isCleanTag? "": "-SNAPSHOT" + snapshotVersion = ! details.isCleanTag + } + catch (Exception e) { + e.printStackTrace() + // last fall back + version = detailedVersionString + } +} + +// trim version if it is of size 4 to size 3 +def versionParts = version.tokenize(".") +if (versionParts.size() > 3) { + // at-least 4 part version + // we check if the 4th part is a .0 in which case we want to create a release + if ((versionParts.size() == 4) && (versionParts[3] == '0')) { + versionParts = versionParts[0..2] + } + version = versionParts[0..2].join('.') + if (versionParts.size() > 3) { + version = version + "-" + versionParts[3..versionParts.size()-1].join('-') + } + cliMajorVersion = version +} else if (versionParts.size() == 3) { + cliMajorVersion = version +} + +if (snapshotVersion) { + if (versionParts[versionParts.size()-1].isInteger()) { + def base_version = versionParts[0..versionParts.size()-2].join('.') + version = base_version + '.' + (versionParts[versionParts.size()-1].toInteger()+1).toString() + "-SNAPSHOT" + cliMajorVersion = base_version + "." + versionParts[versionParts.size()-1] + } else { + // we are unable to part the last token as an integer, so we just append SNAPSHOT to this version + version = versionParts[0..versionParts.size()-1].join('.') + '-SNAPSHOT' + cliMajorVersion = versionParts[0..versionParts.size()-1].join('.') + } +} + + +processResources { + filter(ReplaceTokens, tokens:[fullVersion: detailedVersionString]) + filter(ReplaceTokens, tokens:[cliMajorVersion: cliMajorVersion]) +} + +task printVersionDetails() { + println("fullVersion=" + detailedVersionString) + println("cliMajorVersion=" + cliMajorVersion) + println("version=" + version) +} diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index 453e8727c48bbf..6ad9fc6b89b169 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -1,4 +1,5 @@ apply plugin: 'java' +apply from: "../../gradle/versioning/versioning.gradle" dependencies { compile project(':metadata-io') @@ -33,3 +34,7 @@ dependencies { testCompile externalDependency.testng } + +processResources.configure { + finalizedBy printVersionDetails // always print version details +} diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index e136a186028506..a54a16d59d408a 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -44,7 +44,7 @@ authorization: ingestion: enabled: ${UI_INGESTION_ENABLED:true} - defaultCliVersion: '${UI_INGESTION_DEFAULT_CLI_VERSION:0.8.42}' + defaultCliVersion: '${UI_INGESTION_DEFAULT_CLI_VERSION:@cliMajorVersion@}' telemetry: enabledCli: ${CLI_TELEMETRY_ENABLED:true} From d590ae8f2f6787f8773d8aeed36c98c7af8e552b Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sun, 2 Oct 2022 16:47:21 -0700 Subject: [PATCH 19/76] docs: add information about mapping ports for datahub-gms (#6092) --- docs/quickstart.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/quickstart.md b/docs/quickstart.md index f8fc64e8e65f25..83efe2fc1f262d 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -96,6 +96,7 @@ By default the quickstart deploy will require the following ports to be free on In case the default ports conflict with software you are already running on your machine, you can override these ports by passing additional flags to the `datahub docker quickstart` command. e.g. To override the MySQL port with 53306 (instead of the default 3306), you can say: `datahub docker quickstart --mysql-port 53306`. Use `datahub docker quickstart --help` to see all the supported options. + For the metadata service container (datahub-gms), you need to use an environment variable, `DATAHUB_MAPPED_GMS_PORT`. So for instance to use the port 58080, you would say `DATAHUB_MAPPED_GMS_PORT=58080 datahub docker quickstart` From bab5acc5e37ba4ce6738bc6af2885c4b48738532 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sun, 2 Oct 2022 19:35:10 -0700 Subject: [PATCH 20/76] chore(deps): upgrade graphql-java deps to 19.0 (#6099) --- build.gradle | 4 +++- datahub-graphql-core/build.gradle | 3 ++- .../java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index 55a4eca1a11148..6965c8e7732613 100644 --- a/build.gradle +++ b/build.gradle @@ -4,6 +4,7 @@ buildscript { ext.mavenVersion = '3.6.3' ext.springVersion = '5.3.20' ext.springBootVersion = '2.5.12' + ext.graphQLJavaVersion = '19.0' apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) dependencies { @@ -66,7 +67,8 @@ project.ext.externalDependency = [ 'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.9.3', 'elasticSearchTransport': 'org.elasticsearch.client:transport:7.9.3', 'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1', - 'graphqlJava': 'com.graphql-java:graphql-java:16.1', + 'graphqlJava': 'com.graphql-java:graphql-java:' + graphQLJavaVersion, + 'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:' + graphQLJavaVersion, 'gson': 'com.google.code.gson:gson:2.8.9', 'guice': 'com.google.inject:guice:4.2.2', 'guava': 'com.google.guava:guava:27.0.1-jre', diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index 62d77f03bbb5c6..aa13ce05d7d59e 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -9,7 +9,8 @@ dependencies { compile project(':metadata-io') compile project(':metadata-utils') - compile externalDependency.graphqlJava + implementation externalDependency.graphqlJava + implementation externalDependency.graphqlJavaScalars compile externalDependency.antlr4Runtime compile externalDependency.antlr4 compile externalDependency.guava diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index c292c0cd3acc34..db9f999a2bbe32 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -282,7 +282,7 @@ import static com.linkedin.datahub.graphql.Constants.*; import static com.linkedin.metadata.Constants.*; -import static graphql.Scalars.*; +import static graphql.scalars.ExtendedScalars.*; /** From e688dd554a5d4da8d1d5f9474fe683ee133b127e Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Sun, 2 Oct 2022 23:16:45 -0700 Subject: [PATCH 21/76] chore(deps): upgrade neo4j to 4.4.x (#6101) --- build.gradle | 5 ++-- docker/docker-compose.yml | 2 +- .../quickstart/docker-compose.quickstart.yml | 2 +- .../graph/neo4j/Neo4jTestServerBuilder.java | 23 +++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/build.gradle b/build.gradle index 6965c8e7732613..02bebf5ebf14bc 100644 --- a/build.gradle +++ b/build.gradle @@ -4,6 +4,7 @@ buildscript { ext.mavenVersion = '3.6.3' ext.springVersion = '5.3.20' ext.springBootVersion = '2.5.12' + ext.neo4jVersion = '4.4.9' ext.graphQLJavaVersion = '19.0' apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) @@ -111,8 +112,8 @@ project.ext.externalDependency = [ 'mockServer': 'org.mock-server:mockserver-netty:5.11.2', 'mockServerClient': 'org.mock-server:mockserver-client-java:5.11.2', 'mysqlConnector': 'mysql:mysql-connector-java:8.0.20', - 'neo4jHarness': 'org.neo4j.test:neo4j-harness:3.4.11', - 'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.1', + 'neo4jHarness': 'org.neo4j.test:neo4j-harness:' + neo4jVersion, + 'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jVersion, 'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:1.0.0', 'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:1.0.0', 'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15', diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 58ee367126c506..7592fd5e3cd78a 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -69,7 +69,7 @@ services: retries: 4 neo4j: - image: neo4j:4.0.6 + image: neo4j:4.4.9-community env_file: neo4j/env/docker.env hostname: neo4j container_name: neo4j diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index a7018a176f81ce..6c0772206a2be7 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -170,7 +170,7 @@ services: - NEO4J_dbms_default__database=graph.db - NEO4J_dbms_allow__upgrade=true hostname: neo4j - image: neo4j:4.0.6 + image: neo4j:4.4.9-community ports: - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java index a95768994a7382..4d6d15255b9222 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java @@ -3,31 +3,30 @@ import java.io.File; import java.net.URI; import org.neo4j.graphdb.GraphDatabaseService; -import org.neo4j.harness.ServerControls; -import org.neo4j.harness.TestServerBuilder; -import org.neo4j.harness.TestServerBuilders; - +import org.neo4j.harness.Neo4j; +import org.neo4j.harness.Neo4jBuilder; +import org.neo4j.harness.internal.InProcessNeo4jBuilder; public class Neo4jTestServerBuilder { - private final TestServerBuilder builder; - private ServerControls controls; + private final Neo4jBuilder builder; + private Neo4j controls; - private Neo4jTestServerBuilder(TestServerBuilder builder) { + private Neo4jTestServerBuilder(Neo4jBuilder builder) { this.builder = builder; } public Neo4jTestServerBuilder() { - this(TestServerBuilders.newInProcessBuilder()); + this(new InProcessNeo4jBuilder()); } public Neo4jTestServerBuilder(File workingDirectory) { - this(TestServerBuilders.newInProcessBuilder(workingDirectory)); + this(new InProcessNeo4jBuilder(workingDirectory.toPath())); } - public ServerControls newServer() { + public Neo4j newServer() { if (controls == null) { - controls = builder.newServer(); + controls = builder.build(); } return controls; } @@ -61,6 +60,6 @@ public URI httpsURI() { } public GraphDatabaseService getGraphDatabaseService() { - return controls.graph(); + return controls.defaultDatabaseService(); } } From 5fb875a32be06c831bda690912e18644e52768d0 Mon Sep 17 00:00:00 2001 From: Peter Szalai Date: Mon, 3 Oct 2022 09:35:46 +0200 Subject: [PATCH 22/76] feat(docs): Improve documentation on Search (#5889) --- docs/how/search.md | 141 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 127 insertions(+), 14 deletions(-) diff --git a/docs/how/search.md b/docs/how/search.md index 6cef5cf8893118..73d4ec8f1c2c32 100644 --- a/docs/how/search.md +++ b/docs/how/search.md @@ -1,43 +1,145 @@ -# Search Guide +import FeatureAvailability from '@site/src/components/FeatureAvailability'; -## Introduction +# About DataHub Search -The search bar is one of the means of finding data in Datahub. In this document, we discuss more effective ways of finding information beyond doing a standard keyword search. This is because keyword searches can return results from almost any part of an entity. + -### Search in Specific Fields + + + + +The **search bar** is an important mechanism for discovering data assets in DataHub. From the search bar, you can find Datasets, Columns, Dashboards, Charts, Data Pipelines, and more. Simply type in a term and press 'enter'. + +

+ +

+ +**Advanced queries** and the **filter sidebar** helps fine tuning queries. For programmatic users Datahub provides a **GraphQL API** as well. + +## Search Setup, Prerequisites, and Permissions + +Search is available for all users. Although Search works out of the box, the more relevant data you ingest, the better the results are. + +## Using Search + +Searching is as easy as typing in relevant business terms and pressing 'enter' to view matching data assets. + +By default, search terms will match against different aspects of a data assets. This includes asset names, descriptions, tags, terms, owners, and even specific attributes like the names of columns in a table. + + +### Filters + +The filters sidebar sits on the left hand side of search results, and lets users find assets by drilling down. You can quickly filter by Data Platform (e.g. Snowflake), Tags, Glossary Terms, Domain, Owners, and more with a single click. + +

+ +

+ +### Results + +Search results appear ranked by their relevance. In self-hosted DataHub ranking is based on how closely the query matched textual fields of an asset and its metadata. In Managed DataHub, ranking is based on a combination of textual relevance, usage (queries / views), and change frequency. + +With better metadata comes better results. Learn more about ingestion technical metadata in the [metadata ingestion](../../metadata-ingestion/README.md) guide. + +### Advanced queries + +The search bar supports advanced queries with pattern matching, logical expressions and filtering by specific field matches. + +The following examples are in the format of +X: *typical question* : ```what to key in search bar```. [sample url](https://example.com) Wildcard characters can be added to the search terms as well. These examples are non exhaustive and using Datasets as a reference. -I want to: -1. *Find a dataset with the word **mask** in the name* : +If you want to: +1. Find a dataset with the word **mask** in the name: ```name: *mask*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=name%3A%20%2Amask%2A) This will return entities with **mask** in the name. Names tends to be connected by other symbols, hence the wildcard symbols before and after the word. -2. *Find a dataset with a property, **encoding*** +2. Find a dataset with a property, **encoding** ```customProperties: encoding*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=customProperties%3A%20encoding%2A) Dataset Properties are indexed in ElasticSearch the manner of key=value. Hence if you know the precise key-value pair, you can search using ```key=value```. However, if you only know the key, you can use wildcards to replace the value and that is what is being done here. -3. *Find a dataset with a column name, **latitude*** +3. Find a dataset with a column name, **latitude** ```fieldPaths: latitude``` [Sample results](https://demo.datahubproject.io/search?page=1&query=fieldPaths%3A%20latitude) fieldPaths is the name of the attribute that holds the column name in Datasets. -4. *Find a dataset with the term **latitude** in the field description* +4. Find a dataset with the term **latitude** in the field description ```editedFieldDescriptions: latitude OR fieldDescriptions: latitude``` [Sample results](https://demo.datahubproject.io/search?page=1&query=editedFieldDescriptions%3A%20latitude%20OR%20fieldDescriptions%3A%20latitude) Datasets has 2 attributes that contains field description. fieldDescription comes from the SchemaMetadata aspect, while editedFieldDescriptions comes from the EditableSchemaMetadata aspect. EditableSchemaMetadata holds information that comes from UI edits, while SchemaMetadata holds data from ingestion of the dataset. -5. *Find a dataset with the term **logical** in the dataset description* +5. Find a dataset with the term **logical** in the dataset description ```editedDescription: *logical* OR description: *logical*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=editedDescription%3A%20%2Alogical%2A%20OR%20description%3A%20%2Alogical%2A) Similar to field descriptions, dataset descriptions can be found in 2 aspects, hence the need to search 2 attributes. -6. *Find a dataset which reside in one of the browsing folders, for instance, the **hive** folder* +6. Find a dataset which reside in one of the browsing folders, for instance, the **hive** folder ```browsePaths: *hive*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=browsePaths%3A%20%2Ahive%2A) BrowsePath is stored as a complete string, for instance ```/datasets/prod/hive/SampleKafkaDataset```, hence the need for wildcards on both ends of the term to return a result. -## Where to find more information? + + +### Videos + +**What can you do with DataHub?** + +

+ +

+ + +### GraphQL + +* [searchAcrossEntities](https://datahubproject.io/docs/graphql/queries/#searchacrossentities) +* You can try out the API on the demo instance's public GraphQL interface: [here](https://demo.datahubproject.io/api/graphiql) + +The same GraphQL API that powers the Search UI can be used +for integrations and programmatic use-cases. + +``` +# Example query +{ + searchAcrossEntities( + input: {types: [], query: "*", start: 0, count: 10, filters: [{field: "fieldTags", value: "urn:li:tag:Dimension"}]} + ) { + start + count + total + searchResults { + entity { + type + ... on Dataset { + urn + type + platform { + name + } + name + } + } + } + } +} +``` + + +### DataHub Blog +* [Using DataHub for Search & Discovery](https://blog.datahubproject.io/using-datahub-for-search-discovery-fa309089be22) + +## FAQ and Troubleshooting + +**How are the results ordered?** + +The order of the search results is based on the weight what Datahub gives them based on our search algorithm. The current algorithm in OSS DataHub is based on a text-match score from Elastic Search. + +**Where to find more information?** + The sample queries here are non exhaustive. [The link here](https://demo.datahubproject.io/tag/urn:li:tag:Searchable) shows the current list of indexed fields for each entity inside Datahub. Click on the fields inside each entity and see which field has the tag ```Searchable```. However, it does not tell you the specific attribute name to use for specialized searches. One way to do so is to inspect the ElasticSearch indices, for example: ```curl http://localhost:9200/_cat/indices``` returns all the ES indices in the ElasticSearch container. @@ -121,5 +223,16 @@ example information of a dataset: }, ``` + + +*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!* +### Related Features +* [Metadata ingestion framework](../../metadata-ingestion/README.md) From bfb903cfb874db2c19a53e0eb2d3c1c69725dba8 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Mon, 3 Oct 2022 19:56:19 -0500 Subject: [PATCH 23/76] feat(ingest): add async option to ingest proposal endpoint (#6097) * feat(ingest): add async option to ingest proposal endpoint * small tweak to validate before write to K, also keep existing path for timeseries aspects * avoid double convert Co-authored-by: Shirshanka Das --- .../resolvers/mutate/MutationUtils.java | 2 +- .../mutate/UpdateUserSettingResolver.java | 2 +- .../resolvers/mutate/util/DeleteUtils.java | 2 +- .../mutate/util/DeprecationUtils.java | 2 +- .../resolvers/mutate/util/DomainUtils.java | 2 +- .../resolvers/mutate/util/LabelUtils.java | 2 +- .../resolvers/mutate/util/OwnerUtils.java | 2 +- .../graphql/types/chart/ChartType.java | 2 +- .../types/dashboard/DashboardType.java | 2 +- .../graphql/types/dataflow/DataFlowType.java | 2 +- .../graphql/types/datajob/DataJobType.java | 2 +- .../graphql/types/dataset/DatasetType.java | 4 +- .../graphql/types/notebook/NotebookType.java | 2 +- .../datahub/graphql/types/tag/TagType.java | 2 +- .../linkedin/datahub/graphql/TestUtils.java | 25 ++++++++++ .../BatchUpdateSoftDeletedResolverTest.java | 30 +++-------- .../BatchUpdateDeprecationResolverTest.java | 30 +++-------- .../domain/BatchSetDomainResolverTest.java | 44 ++++------------ .../glossary/AddRelatedTermsResolverTest.java | 42 +++------------- .../RemoveRelatedTermsResolverTest.java | 25 ++-------- .../glossary/UpdateNameResolverTest.java | 21 ++------ .../UpdateParentNodeResolverTest.java | 24 +++------ .../mutate/MutableTypeBatchResolverTest.java | 2 +- .../mutate/UpdateUserSettingResolverTest.java | 6 +-- .../owner/AddOwnersResolverTest.java | 25 +++------- .../owner/BatchAddOwnersResolverTest.java | 24 +++------ .../owner/BatchRemoveOwnersResolverTest.java | 21 ++------ .../resolvers/tag/AddTagsResolverTest.java | 24 +++------ .../tag/BatchAddTagsResolverTest.java | 28 +++-------- .../tag/BatchRemoveTagsResolverTest.java | 20 +++----- .../resolvers/term/AddTermsResolverTest.java | 12 ++--- .../term/BatchAddTermsResolverTest.java | 25 +++------- .../term/BatchRemoveTermsResolverTest.java | 21 ++------ .../dao/producer/KafkaEventProducer.java | 38 ++++++++++++++ .../com/linkedin/metadata/EventUtils.java | 39 ++++++++++++--- .../src/datahub/cli/cli_utils.py | 4 +- .../metadata/client/JavaEntityClient.java | 14 +++--- .../metadata/entity/DeleteEntityService.java | 4 +- .../metadata/entity/EntityService.java | 50 +++++++++++-------- .../metadata/entity/RetentionService.java | 4 +- .../metadata/event/EventProducer.java | 11 ++++ .../metadata/entity/EntityServiceTest.java | 50 ++++++++++++++++++- .../MetadataChangeProposalsProcessor.java | 2 +- .../token/StatefulTokenService.java | 4 +- .../linkedin/metadata/boot/UpgradeStep.java | 4 +- .../boot/steps/IngestPoliciesStep.java | 4 +- .../metadata/boot/steps/IngestRolesStep.java | 4 +- .../boot/steps/RemoveClientIdAspectStep.java | 2 +- .../boot/steps/RestoreDbtSiblingsIndices.java | 2 +- .../steps/UpgradeDefaultBrowsePathsStep.java | 3 +- .../steps/RestoreGlossaryIndicesTest.java | 9 ++-- .../UpgradeDefaultBrowsePathsStepTest.java | 12 +++-- .../openapi/util/MappingUtil.java | 4 +- .../com.linkedin.entity.aspects.restspec.json | 4 ++ .../com.linkedin.entity.aspects.snapshot.json | 4 ++ .../linkedin/entity/client/EntityClient.java | 26 ++++++++-- .../entity/client/RestliEntityClient.java | 3 +- .../resources/entity/AspectResource.java | 18 +++++-- .../entity/BatchIngestionRunResource.java | 3 +- 59 files changed, 396 insertions(+), 405 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java index ab7f645887f903..3cc64aed7f5621 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutationUtils.java @@ -30,7 +30,7 @@ public static void persistAspect(Urn urn, String aspectName, RecordTemplate aspe proposal.setAspectName(aspectName); proposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); proposal.setChangeType(ChangeType.UPSERT); - entityService.ingestProposal(proposal, getAuditStamp(actor)); + entityService.ingestProposal(proposal, getAuditStamp(actor), false); } public static MetadataChangeProposal buildMetadataChangeProposal(Urn urn, String aspectName, RecordTemplate aspect, Urn actor, EntityService entityService) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java index 47678973a515db..86a8415da3d39a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java @@ -51,7 +51,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw MetadataChangeProposal proposal = buildMetadataChangeProposal(actor, CORP_USER_SETTINGS_ASPECT_NAME, newSettings, actor, _entityService); - _entityService.ingestProposal(proposal, getAuditStamp(actor)); + _entityService.ingestProposal(proposal, getAuditStamp(actor), false); return true; } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java index 75c54277e2b9d7..1828b6eb83f637 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java @@ -74,7 +74,7 @@ private static MetadataChangeProposal buildSoftDeleteProposal( private static void ingestChangeProposals(List changes, EntityService entityService, Urn actor) { // TODO: Replace this with a batch ingest proposals endpoint. for (MetadataChangeProposal change : changes) { - entityService.ingestProposal(change, getAuditStamp(actor)); + entityService.ingestProposal(change, getAuditStamp(actor), false); } } } \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java index 48af0b401084e4..3a12dd8b6eb75c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java @@ -89,7 +89,7 @@ private static MetadataChangeProposal buildUpdateDeprecationProposal( private static void ingestChangeProposals(List changes, EntityService entityService, Urn actor) { // TODO: Replace this with a batch ingest proposals endpoint. for (MetadataChangeProposal change : changes) { - entityService.ingestProposal(change, getAuditStamp(actor)); + entityService.ingestProposal(change, getAuditStamp(actor), false); } } } \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java index addd0bbd2b9f10..e0e964b02fa23a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java @@ -88,7 +88,7 @@ public static void validateDomain(Urn domainUrn, EntityService entityService) { private static void ingestChangeProposals(List changes, EntityService entityService, Urn actor) { // TODO: Replace this with a batch ingest proposals endpoint. for (MetadataChangeProposal change : changes) { - entityService.ingestProposal(change, getAuditStamp(actor)); + entityService.ingestProposal(change, getAuditStamp(actor), false); } } } \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java index 62fe5531ffb6dc..7f9c44e29e2ab0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java @@ -556,7 +556,7 @@ private static GlossaryTermAssociationArray removeTermsIfExists(GlossaryTerms te private static void ingestChangeProposals(List changes, EntityService entityService, Urn actor) { // TODO: Replace this with a batch ingest proposals endpoint. for (MetadataChangeProposal change : changes) { - entityService.ingestProposal(change, getAuditStamp(actor)); + entityService.ingestProposal(change, getAuditStamp(actor), false); } } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index 4c13367f5da0ed..f3510283cf60ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -215,7 +215,7 @@ public static Boolean validateRemoveInput( private static void ingestChangeProposals(List changes, EntityService entityService, Urn actor) { // TODO: Replace this with a batch ingest proposals endpoint. for (MetadataChangeProposal change : changes) { - entityService.ingestProposal(change, getAuditStamp(actor)); + entityService.ingestProposal(change, getAuditStamp(actor), false); } } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java index 87be15ef21f172..1c041db9f8b5df 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java @@ -203,7 +203,7 @@ public Chart update(@Nonnull String urn, @Nonnull ChartUpdateInput input, @Nonnu proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java index 62566043adef86..c4ef925e0d71c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java @@ -192,7 +192,7 @@ public Dashboard update(@Nonnull String urn, @Nonnull DashboardUpdateInput input proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java index 57e03f16061c88..92c3fe90685824 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java @@ -179,7 +179,7 @@ public DataFlow update(@Nonnull String urn, @Nonnull DataFlowUpdateInput input, proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java index 8a5d74faf93497..1200eecec7d532 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java @@ -179,7 +179,7 @@ public DataJob update(@Nonnull String urn, @Nonnull DataJobUpdateInput input, @N proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 87b96f91aeda6e..6bb47da3a4dc1f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -208,7 +208,7 @@ public List batchUpdate(@Nonnull BatchDatasetUpdateInput[] input, @Nonn final List urns = Arrays.stream(input).map(BatchDatasetUpdateInput::getUrn).collect(Collectors.toList()); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urns), e); } @@ -224,7 +224,7 @@ public Dataset update(@Nonnull String urn, @Nonnull DatasetUpdateInput input, @N proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java index ba715d990e8b0c..a841439c3b926f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java @@ -175,7 +175,7 @@ public Notebook update(@Nonnull String urn, @Nonnull NotebookUpdateInput input, proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java index 41ae275f4242bf..9aace619650066 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java @@ -132,7 +132,7 @@ public Tag update(@Nonnull String urn, @Nonnull TagUpdateInput input, @Nonnull Q final Collection proposals = TagUpdateInputMapper.map(input, actor); proposals.forEach(proposal -> proposal.setEntityUrn(UrnUtils.getUrn(urn))); try { - _entityClient.batchIngestProposals(proposals, context.getAuthentication()); + _entityClient.batchIngestProposals(proposals, context.getAuthentication(), false); } catch (RemoteInvocationException e) { throw new RuntimeException(String.format("Failed to write entity with urn %s", urn), e); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java index ef0cc566c575ec..e93f48336e8a68 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java @@ -3,6 +3,9 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.Authorizer; +import com.linkedin.common.AuditStamp; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; import org.mockito.Mockito; @@ -36,5 +39,27 @@ public static QueryContext getMockDenyContext() { return mockContext; } + public static void verifyIngestProposal(EntityService mockService, int numberOfInvocations, MetadataChangeProposal proposal) { + Mockito.verify(mockService, Mockito.times(numberOfInvocations)).ingestProposal( + Mockito.eq(proposal), + Mockito.any(AuditStamp.class), + Mockito.eq(false) + ); + } + + public static void verifyIngestProposal(EntityService mockService, int numberOfInvocations) { + Mockito.verify(mockService, Mockito.times(numberOfInvocations)).ingestProposal( + Mockito.any(MetadataChangeProposal.class), + Mockito.any(AuditStamp.class), + Mockito.eq(false) + ); + } + + public static void verifyNoIngestProposal(EntityService mockService) { + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); + } + private TestUtils() { } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java index 2fe927100d55cb..12cbf21b136663 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java @@ -66,10 +66,7 @@ public void testGetSuccessNoExistingStatus() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newStatus)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -78,10 +75,7 @@ public void testGetSuccessNoExistingStatus() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newStatus)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -124,10 +118,7 @@ public void testGetSuccessExistingStatus() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newStatus)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -136,10 +127,7 @@ public void testGetSuccessExistingStatus() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newStatus)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -171,9 +159,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -191,9 +177,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -202,7 +186,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchUpdateSoftDeletedResolver resolver = new BatchUpdateSoftDeletedResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java index 49c24770333c73..36909eb075d9bb 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java @@ -73,10 +73,7 @@ public void testGetSuccessNoExistingDeprecation() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newDeprecation)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -85,10 +82,7 @@ public void testGetSuccessNoExistingDeprecation() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newDeprecation)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -140,10 +134,7 @@ public void testGetSuccessExistingDeprecation() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newDeprecation)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -152,10 +143,7 @@ public void testGetSuccessExistingDeprecation() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newDeprecation)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -188,9 +176,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -209,9 +195,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -220,7 +204,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchUpdateDeprecationResolver resolver = new BatchUpdateDeprecationResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java index 756e085593c26c..fe3bfb3dec66b2 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java @@ -77,10 +77,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newDomains)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -89,10 +86,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newDomains)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_DOMAIN_2_URN)) @@ -147,10 +141,7 @@ public void testGetSuccessExistingDomains() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newDomains)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -159,10 +150,7 @@ public void testGetSuccessExistingDomains() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newDomains)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_DOMAIN_2_URN)) @@ -215,10 +203,7 @@ public void testGetSuccessUnsetDomains() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newDomains)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -227,10 +212,7 @@ public void testGetSuccessUnsetDomains() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newDomains)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -258,9 +240,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -294,9 +274,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -315,9 +293,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -326,7 +302,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java index 451faf9bc8e382..6bbf4f47975607 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolverTest.java @@ -1,7 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.glossary; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; @@ -9,7 +8,6 @@ import com.linkedin.datahub.graphql.generated.TermRelationshipType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.ExecutionException; @@ -58,10 +56,7 @@ public void testGetSuccessIsRelatedNonExistent() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)) ); @@ -93,10 +88,7 @@ public void testGetSuccessHasRelatedNonExistent() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)) ); @@ -125,10 +117,7 @@ public void testGetFailAddSelfAsRelatedTerm() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } @Test @@ -148,10 +137,7 @@ public void testGetFailAddNonTermAsRelatedTerm() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } @Test @@ -172,10 +158,7 @@ public void testGetFailAddNonExistentTermAsRelatedTerm() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } @Test @@ -196,10 +179,7 @@ public void testGetFailAddToNonExistentUrn() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } @Test @@ -220,10 +200,7 @@ public void testGetFailAddToNonTerm() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } @Test @@ -246,10 +223,7 @@ public void testFailNoPermissions() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java index 6a704c2b61c127..dd54d7f9835c1d 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolverTest.java @@ -1,7 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.glossary; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlossaryTermUrnArray; import com.linkedin.common.urn.GlossaryTermUrn; import com.linkedin.common.urn.Urn; @@ -12,7 +11,6 @@ import com.linkedin.glossary.GlossaryRelatedTerms; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -20,8 +18,7 @@ import java.util.Arrays; import java.util.concurrent.ExecutionException; -import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; -import static com.linkedin.datahub.graphql.TestUtils.getMockDenyContext; +import static com.linkedin.datahub.graphql.TestUtils.*; import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; @@ -57,10 +54,7 @@ public void testGetSuccessIsA() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)) ); @@ -92,10 +86,7 @@ public void testGetSuccessHasA() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)) ); @@ -123,10 +114,7 @@ public void testFailAspectDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); } @Test @@ -155,10 +143,7 @@ public void testFailNoPermissions() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(ExecutionException.class, () -> resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyNoIngestProposal(mockService); Mockito.verify(mockService, Mockito.times(0)).exists( Mockito.eq(Urn.createFromString(TEST_ENTITY_URN)) ); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java index e3edfe0efe1342..1c037ea04ef25c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java @@ -21,7 +21,7 @@ import java.util.concurrent.CompletionException; -import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.datahub.graphql.TestUtils.*; import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; @@ -71,10 +71,7 @@ public void testGetSuccess() throws Exception { final MetadataChangeProposal proposal = setupTests(mockEnv, mockService); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any() - ); + verifyIngestProposal(mockService, 1, proposal); } @Test @@ -108,10 +105,7 @@ public void testGetSuccessForNode() throws Exception { UpdateNameResolver resolver = new UpdateNameResolver(mockService); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any() - ); + verifyIngestProposal(mockService, 1, proposal); } @Test @@ -145,10 +139,7 @@ public void testGetSuccessForDomain() throws Exception { UpdateNameResolver resolver = new UpdateNameResolver(mockService); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any() - ); + verifyIngestProposal(mockService, 1, proposal); } @Test @@ -162,8 +153,6 @@ public void testGetFailureEntityDoesNotExist() throws Exception { setupTests(mockEnv, mockService); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any()); + verifyNoIngestProposal(mockService); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java index 1cba0a86b97632..b9161996e8e354 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateParentNodeResolverTest.java @@ -20,7 +20,7 @@ import java.net.URISyntaxException; -import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.datahub.graphql.TestUtils.*; import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; @@ -72,10 +72,7 @@ public void testGetSuccess() throws Exception { final MetadataChangeProposal proposal = setupTests(mockEnv, mockService); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any() - ); + verifyIngestProposal(mockService, 1, proposal); } @Test @@ -111,10 +108,7 @@ public void testGetSuccessForNode() throws Exception { UpdateParentNodeResolver resolver = new UpdateParentNodeResolver(mockService); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any() - ); + verifyIngestProposal(mockService, 1, proposal); } @Test @@ -129,9 +123,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { setupTests(mockEnv, mockService); assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any()); + verifyNoIngestProposal(mockService); } @Test @@ -146,9 +138,7 @@ public void testGetFailureNodeDoesNotExist() throws Exception { setupTests(mockEnv, mockService); assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any()); + verifyNoIngestProposal(mockService); } @Test @@ -163,8 +153,6 @@ public void testGetFailureParentIsNotNode() throws Exception { setupTests(mockEnv, mockService); assertThrows(URISyntaxException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any()); + verifyNoIngestProposal(mockService); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolverTest.java index 04ed7720333151..61dd6c678e6e0f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolverTest.java @@ -144,7 +144,7 @@ public void testGetSuccess() throws Exception { List result = resolver.get(mockEnv).join(); ArgumentCaptor> changeProposalCaptor = ArgumentCaptor.forClass((Class) Collection.class); - Mockito.verify(mockClient, Mockito.times(1)).batchIngestProposals(changeProposalCaptor.capture(), Mockito.any()); + Mockito.verify(mockClient, Mockito.times(1)).batchIngestProposals(changeProposalCaptor.capture(), Mockito.any(), Mockito.eq(false)); Mockito.verify(mockClient, Mockito.times(1)).batchGetV2( Mockito.eq(Constants.DATASET_ENTITY_NAME), Mockito.eq(ImmutableSet.of(datasetUrn1, datasetUrn2)), diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java index d21e8a8e31d565..605f1e4142e187 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolverTest.java @@ -1,6 +1,5 @@ package com.linkedin.datahub.graphql.resolvers.mutate; -import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.UpdateUserSettingInput; @@ -47,9 +46,6 @@ public void testWriteCorpUserSettings() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(newSettings)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java index 16a8e27b7559ab..d4bec4adb81a08 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java @@ -12,7 +12,6 @@ import com.linkedin.datahub.graphql.resolvers.mutate.AddOwnersResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -56,10 +55,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { assertTrue(resolver.get(mockEnv).get()); // Unable to easily validate exact payload due to the injected timestamp - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN)) @@ -98,10 +94,7 @@ public void testGetSuccessExistingOwners() throws Exception { assertTrue(resolver.get(mockEnv).get()); // Unable to easily validate exact payload due to the injected timestamp - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN)) @@ -136,9 +129,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -165,9 +156,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -185,9 +174,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -196,7 +183,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); AddOwnersResolver resolver = new AddOwnersResolver(Mockito.mock(EntityService.class)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java index 43121fa592fc92..3a846c8f27c715 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java @@ -74,10 +74,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(), // Ownership has a dynamically generated timestamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_OWNER_URN_1)) @@ -133,10 +130,7 @@ public void testGetSuccessExistingOwners() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(), // Ownership has a dynamically generated timestamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_OWNER_URN_1)) @@ -180,9 +174,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -224,9 +216,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -253,9 +243,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -264,7 +252,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java index ac4e0a7cdbef63..6dad703929e0cf 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java @@ -14,7 +14,6 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveOwnersResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -67,10 +66,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(), // Ownership has a dynamically generated timestamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); } @Test @@ -116,10 +112,7 @@ public void testGetSuccessExistingOwners() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); } @Test @@ -154,9 +147,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -176,9 +167,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -187,7 +176,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java index 1b1ead881574d5..e0769668be0232 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java @@ -70,10 +70,7 @@ public void testGetSuccessNoExistingTags() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(newTags)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_TAG_1_URN)) @@ -127,10 +124,7 @@ public void testGetSuccessExistingTags() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(newTags)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_TAG_1_URN)) @@ -166,9 +160,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -196,9 +188,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -217,9 +207,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -228,7 +216,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.eq(false)); AddTagsResolver resolver = new AddTagsResolver(Mockito.mock(EntityService.class)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java index 0eb361138002d3..4991f1b59a0f9f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java @@ -83,10 +83,7 @@ public void testGetSuccessNoExistingTags() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newTags)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -95,10 +92,7 @@ public void testGetSuccessNoExistingTags() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newTags)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_TAG_1_URN)) @@ -162,10 +156,7 @@ public void testGetSuccessExistingTags() throws Exception { proposal1.setAspect(GenericRecordUtils.serializeAspect(newTags)); proposal1.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal1); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); proposal2.setEntityUrn(Urn.createFromString(TEST_ENTITY_URN_2)); @@ -174,10 +165,7 @@ public void testGetSuccessExistingTags() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(newTags)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_TAG_1_URN)) @@ -217,7 +205,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -256,7 +244,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -280,7 +268,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -289,7 +277,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchAddTagsResolver resolver = new BatchAddTagsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java index 124927ff0ae7bc..7d3d87636142a4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java @@ -81,7 +81,7 @@ public void testGetSuccessNoExistingTags() throws Exception { Mockito.verify(mockService, Mockito.times(1)).ingestProposal( Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), Mockito.eq(false) ); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); @@ -91,10 +91,7 @@ public void testGetSuccessNoExistingTags() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(emptyTags)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -154,7 +151,7 @@ public void testGetSuccessExistingTags() throws Exception { Mockito.verify(mockService, Mockito.times(1)).ingestProposal( Mockito.eq(proposal1), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), Mockito.eq(false) ); final MetadataChangeProposal proposal2 = new MetadataChangeProposal(); @@ -164,10 +161,7 @@ public void testGetSuccessExistingTags() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(emptyTags)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(mockService, Mockito.times(1)).ingestProposal( - Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 1, proposal2); } @Test @@ -206,7 +200,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -230,7 +224,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -239,7 +233,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchRemoveTagsResolver resolver = new BatchRemoveTagsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java index 2ac8842d9590e1..c9ec92001f89bc 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java @@ -59,7 +59,7 @@ public void testGetSuccessNoExistingTerms() throws Exception { // Unable to easily validate exact payload due to the injected timestamp Mockito.verify(mockService, Mockito.times(1)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), Mockito.eq(false) ); Mockito.verify(mockService, Mockito.times(1)).exists( @@ -105,7 +105,7 @@ public void testGetSuccessExistingTerms() throws Exception { // Unable to easily validate exact payload due to the injected timestamp Mockito.verify(mockService, Mockito.times(1)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), Mockito.eq(false) ); Mockito.verify(mockService, Mockito.times(1)).exists( @@ -144,7 +144,7 @@ public void testGetFailureTermDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -174,7 +174,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -195,7 +195,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); } @Test @@ -204,7 +204,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); AddTermsResolver resolver = new AddTermsResolver(Mockito.mock(EntityService.class)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java index 78655daf13776d..dfe1394635c4ed 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java @@ -14,7 +14,6 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddTermsResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -67,10 +66,7 @@ public void testGetSuccessNoExistingTerms() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), // glossary terms contains a dynamically generated audit stamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_GLOSSARY_TERM_1_URN)) @@ -122,10 +118,7 @@ public void testGetSuccessExistingTerms() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), // glossary terms contains a dynamically generated audit stamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); Mockito.verify(mockService, Mockito.times(1)).exists( Mockito.eq(Urn.createFromString(TEST_GLOSSARY_TERM_1_URN)) @@ -162,9 +155,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -200,9 +191,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -223,9 +212,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -234,7 +221,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchAddTermsResolver resolver = new BatchAddTermsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java index cc5d825ac5ee56..dcc8659c1baf3b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java @@ -14,7 +14,6 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveTermsResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -67,10 +66,7 @@ public void testGetSuccessNoExistingTerms() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), // Glossary terms contains dynamically generated audit stamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); } @Test @@ -119,10 +115,7 @@ public void testGetSuccessExistingTerms() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); - Mockito.verify(mockService, Mockito.times(2)).ingestProposal( - Mockito.any(MetadataChangeProposal.class), // Glossary terms contains dynamically generated audit stamp - Mockito.any(AuditStamp.class) - ); + verifyIngestProposal(mockService, 2); } @Test @@ -159,9 +152,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -183,9 +174,7 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockService, Mockito.times(0)).ingestProposal( - Mockito.any(), - Mockito.any(AuditStamp.class)); + verifyNoIngestProposal(mockService); } @Test @@ -194,7 +183,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(AuditStamp.class)); + Mockito.any(AuditStamp.class), Mockito.anyBoolean()); BatchRemoveTermsResolver resolver = new BatchRemoveTermsResolver(mockService); diff --git a/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java b/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java index d781d2b325a0cb..2af6d78b41612c 100644 --- a/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java +++ b/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java @@ -10,6 +10,7 @@ import com.linkedin.mxe.MetadataAuditEvent; import com.linkedin.mxe.MetadataAuditOperation; import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.PlatformEvent; import com.linkedin.mxe.SystemMetadata; import com.linkedin.mxe.TopicConvention; @@ -155,6 +156,43 @@ record = EventUtils.pegasusToAvroMCL(metadataChangeLog); } } + @Override + @WithSpan + public void produceMetadataChangeProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal) { + GenericRecord record; + + Urn urn = metadataChangeProposal.getEntityUrn(); + if (urn == null) { + throw new IllegalArgumentException("Urn for proposal cannot be null."); + } + try { + log.debug(String.format("Converting Pegasus snapshot to Avro snapshot urn %s\nMetadataChangeProposal: %s", + urn, + metadataChangeProposal)); + record = EventUtils.pegasusToAvroMCP(metadataChangeProposal); + } catch (IOException e) { + log.error(String.format("Failed to convert Pegasus MCP to Avro: %s", metadataChangeProposal), e); + throw new ModelConversionException("Failed to convert Pegasus MCP to Avro", e); + } + + String topic = _topicConvention.getMetadataChangeProposalTopicName(); + if (_callback.isPresent()) { + _producer.send(new ProducerRecord(topic, urn.toString(), record), _callback.get()); + } else { + _producer.send(new ProducerRecord(topic, urn.toString(), record), (metadata, e) -> { + if (e != null) { + log.error(String.format("Failed to emit MCP for entity with urn %s", urn), e); + } else { + log.debug(String.format("Successfully emitted MCP for entity with urn %s at offset %s, partition %s, topic %s", + urn, + metadata.offset(), + metadata.partition(), + metadata.topic())); + } + }); + } + } + @Override public void producePlatformEvent(@Nonnull String name, @Nullable String key, @Nonnull PlatformEvent event) { GenericRecord record; diff --git a/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java index 582c519392a47c..dca9ef3865454a 100644 --- a/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java +++ b/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java @@ -171,7 +171,7 @@ public static PlatformEvent avroToPegasusPE(@Nonnull GenericRecord record) throw public static GenericRecord pegasusToAvroMAE(@Nonnull MetadataAuditEvent event) throws IOException { GenericRecord original = DataTranslator.dataMapToGenericRecord(event.data(), event.schema(), ORIGINAL_MAE_AVRO_SCHEMA); - return renameSchemaNamespace(original, ORIGINAL_MAE_AVRO_SCHEMA, RENAMED_MAE_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_MAE_AVRO_SCHEMA); } /** @@ -185,7 +185,21 @@ public static GenericRecord pegasusToAvroMAE(@Nonnull MetadataAuditEvent event) public static GenericRecord pegasusToAvroMCL(@Nonnull MetadataChangeLog event) throws IOException { GenericRecord original = DataTranslator.dataMapToGenericRecord(event.data(), event.schema(), ORIGINAL_MCL_AVRO_SCHEMA); - return renameSchemaNamespace(original, ORIGINAL_MCL_AVRO_SCHEMA, RENAMED_MCL_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_MCL_AVRO_SCHEMA); + } + + /** + * Converts a Pegasus MAE into the equivalent Avro model as a {@link GenericRecord}. + * + * @param event the Pegasus {@link MetadataChangeProposal} model + * @return the Avro model with com.linkedin.pegasus2avro.mxe namesapce + * @throws IOException if the conversion fails + */ + @Nonnull + public static GenericRecord pegasusToAvroMCP(@Nonnull MetadataChangeProposal event) throws IOException { + GenericRecord original = + DataTranslator.dataMapToGenericRecord(event.data(), event.schema(), ORIGINAL_MCP_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_MCP_AVRO_SCHEMA); } /** @@ -199,7 +213,7 @@ public static GenericRecord pegasusToAvroMCL(@Nonnull MetadataChangeLog event) t public static GenericRecord pegasusToAvroMCE(@Nonnull MetadataChangeEvent event) throws IOException { GenericRecord original = DataTranslator.dataMapToGenericRecord(event.data(), event.schema(), ORIGINAL_MCE_AVRO_SCHEMA); - return renameSchemaNamespace(original, ORIGINAL_MCE_AVRO_SCHEMA, RENAMED_MCE_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_MCE_AVRO_SCHEMA); } /** @@ -232,7 +246,7 @@ public static GenericRecord pegasusToAvroFailedMCE(@Nonnull FailedMetadataChange GenericRecord original = DataTranslator.dataMapToGenericRecord(failedMetadataChangeEvent.data(), failedMetadataChangeEvent.schema(), ORIGINAL_FAILED_MCE_AVRO_SCHEMA); - return renameSchemaNamespace(original, ORIGINAL_FAILED_MCE_AVRO_SCHEMA, RENAMED_FAILED_MCE_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_FAILED_MCE_AVRO_SCHEMA); } /** @@ -248,7 +262,7 @@ public static GenericRecord pegasusToAvroFailedMCP( GenericRecord original = DataTranslator.dataMapToGenericRecord(failedMetadataChangeProposal.data(), failedMetadataChangeProposal.schema(), ORIGINAL_FMCL_AVRO_SCHEMA); - return renameSchemaNamespace(original, ORIGINAL_FMCL_AVRO_SCHEMA, RENAMED_FMCP_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_FMCP_AVRO_SCHEMA); } /** @@ -262,13 +276,16 @@ public static GenericRecord pegasusToAvroFailedMCP( public static GenericRecord pegasusToAvroPE(@Nonnull PlatformEvent event) throws IOException { GenericRecord original = DataTranslator.dataMapToGenericRecord(event.data(), event.schema(), ORIGINAL_PE_AVRO_SCHEMA); - return renameSchemaNamespace(original, ORIGINAL_PE_AVRO_SCHEMA, RENAMED_PE_AVRO_SCHEMA); + return renameSchemaNamespace(original, RENAMED_PE_AVRO_SCHEMA); } /** * Converts original MXE into a renamed namespace + * Does a double convert that should not be necessary since we're already converting prior to calling this method + * in most spots */ @Nonnull + @Deprecated private static GenericRecord renameSchemaNamespace(@Nonnull GenericRecord original, @Nonnull Schema originalSchema, @Nonnull Schema newSchema) throws IOException { @@ -279,6 +296,16 @@ private static GenericRecord renameSchemaNamespace(@Nonnull GenericRecord origin return changeSchema(record, newSchema, newSchema); } + /** + * Converts original MXE into a renamed namespace + */ + @Nonnull + private static GenericRecord renameSchemaNamespace(@Nonnull GenericRecord original, @Nonnull Schema newSchema) + throws IOException { + + return changeSchema(original, newSchema, newSchema); + } + /** * Changes the schema of a {@link GenericRecord} to a compatible schema * diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index bfb79fae1c26fd..5bd8841e6755a8 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -555,6 +555,7 @@ def post_entity( aspect_name: str, aspect_value: Dict, cached_session_host: Optional[Tuple[Session, str]] = None, + is_async: Optional[str] = "false", ) -> int: session, gms_host = cached_session_host or get_session_and_host() endpoint: str = "/aspects/?action=ingestProposal" @@ -569,7 +570,8 @@ def post_entity( "contentType": "application/json", "value": json.dumps(aspect_value), }, - } + }, + "async": is_async, } payload = json.dumps(proposal) url = gms_host + endpoint diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index ae7740021a1504..4324b77880b919 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -8,6 +8,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.VersionedUrn; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; @@ -437,20 +438,17 @@ public List getTimeseriesAspectValues(@Nonnull String urn, @Non } // TODO: Factor out ingest logic into a util that can be accessed by the java client and the resource - @SneakyThrows @Override - public String ingestProposal( - @Nonnull final MetadataChangeProposal metadataChangeProposal, - @Nonnull final Authentication authentication) throws RemoteInvocationException { - + public String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, + @Nonnull final Authentication authentication, final boolean async) throws RemoteInvocationException { String actorUrnStr = authentication.getActor() != null ? authentication.getActor().toUrnStr() : Constants.UNKNOWN_ACTOR; final AuditStamp auditStamp = - new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); + new AuditStamp().setTime(_clock.millis()).setActor(UrnUtils.getUrn(actorUrnStr)); final List additionalChanges = AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService); - Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp).getUrn(); - additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp)); + Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp, async).getUrn(); + additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp, async)); tryIndexRunId(urn, metadataChangeProposal.getSystemMetadata()); return urn.toString(); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index ceced5dd83ca94..5f61e2ebe509b1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -248,7 +248,7 @@ private void deleteAspect(Urn urn, String aspectName, RecordTemplate prevAspect) proposal.setAspectName(aspectName); final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); - final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp); + final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp, false); if (!ingestProposalResult.isDidUpdate()) { log.error("Failed to ingest aspect with references removed. Before {}, after: null, please check MCP processor" @@ -276,7 +276,7 @@ private void updateAspect(Urn urn, String aspectName, RecordTemplate prevAspect, proposal.setAspect(GenericRecordUtils.serializeAspect(newAspect)); final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); - final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp); + final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp, false); if (!ingestProposalResult.isDidUpdate()) { log.error("Failed to ingest aspect with references removed. Before {}, after: {}, please check MCP processor" diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java index 9f0e9550537324..0fbbd559e77389 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -830,10 +830,11 @@ private boolean isValidChangeType(ChangeType changeType, AspectSpec aspectSpec) * * @param mcp the proposal to ingest * @param auditStamp an audit stamp representing the time and actor proposing the change + * @param async a flag to control whether we commit to primary store or just write to proposal log before returning * @return an {@link IngestProposalResult} containing the results */ public IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal mcp, - AuditStamp auditStamp) { + AuditStamp auditStamp, final boolean async) { log.debug("entity type = {}", mcp.getEntityType()); EntitySpec entitySpec = getEntityRegistry().getEntitySpec(mcp.getEntityType()); @@ -841,7 +842,6 @@ public IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal mcp, Urn entityUrn = EntityKeyUtils.getUrnFromProposal(mcp, entitySpec.getKeyAspectSpec()); - AspectSpec aspectSpec = validateAspect(mcp, entitySpec); log.debug("aspect spec = {}", aspectSpec); @@ -850,7 +850,6 @@ public IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal mcp, throw new UnsupportedOperationException("ChangeType not supported: " + mcp.getChangeType() + " for aspect " + mcp.getAspectName()); } - SystemMetadata systemMetadata = generateSystemMetadataIfEmpty(mcp.getSystemMetadata()); systemMetadata.setRegistryName(aspectSpec.getRegistryName()); systemMetadata.setRegistryVersion(aspectSpec.getRegistryVersion().toString()); @@ -861,29 +860,38 @@ public IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal mcp, SystemMetadata newSystemMetadata = null; if (!aspectSpec.isTimeseries()) { - UpdateAspectResult result = null; - switch (mcp.getChangeType()) { - case UPSERT: - result = performUpsert(mcp, aspectSpec, systemMetadata, entityUrn, auditStamp); - break; - case PATCH: - result = performPatch(mcp, aspectSpec, systemMetadata, entityUrn, auditStamp); - break; - default: - // Should never reach since we throw error above - throw new UnsupportedOperationException("ChangeType not supported: " + mcp.getChangeType()); + if (!async) { + // When async mode is turned off, we write to primary store for non timeseries aspects + UpdateAspectResult result = null; + switch (mcp.getChangeType()) { + case UPSERT: + result = performUpsert(mcp, aspectSpec, systemMetadata, entityUrn, auditStamp); + break; + case PATCH: + result = performPatch(mcp, aspectSpec, systemMetadata, entityUrn, auditStamp); + break; + default: + // Should never reach since we throw error above + throw new UnsupportedOperationException("ChangeType not supported: " + mcp.getChangeType()); + } + oldAspect = result != null ? result.getOldValue() : null; + oldSystemMetadata = result != null ? result.getOldSystemMetadata() : null; + newAspect = result != null ? result.getNewValue() : null; + newSystemMetadata = result != null ? result.getNewSystemMetadata() : null; + } else { + // When async is turned on, we write to proposal log and return without waiting + _producer.produceMetadataChangeProposal(mcp); + return new IngestProposalResult(mcp.getEntityUrn(), false); } - oldAspect = result != null ? result.getOldValue() : null; - oldSystemMetadata = result != null ? result.getOldSystemMetadata() : null; - newAspect = result != null ? result.getNewValue() : null; - newSystemMetadata = result != null ? result.getNewSystemMetadata() : null; - } else { + } else { // For timeseries aspects newAspect = convertToRecordTemplate(mcp, aspectSpec); newSystemMetadata = mcp.getSystemMetadata(); } - boolean didUpdate = emitChangeLog(oldAspect, oldSystemMetadata, newAspect, newSystemMetadata, mcp, entityUrn, auditStamp, aspectSpec); + boolean didUpdate = + emitChangeLog(oldAspect, oldSystemMetadata, newAspect, newSystemMetadata, mcp, entityUrn, auditStamp, + aspectSpec); return new IngestProposalResult(entityUrn, didUpdate); } @@ -1709,7 +1717,7 @@ public RollbackResult deleteAspect(String urn, String aspectName, @Nonnull Map consumerRecord) event = EventUtils.avroToPegasusMCP(record); log.debug("MetadataChangeProposal {}", event); // TODO: Get this from the event itself. - entityClient.ingestProposal(event, this.systemAuthentication); + entityClient.ingestProposal(event, this.systemAuthentication, false); } catch (Throwable throwable) { log.error("MCP Processor Error", throwable); log.error("Message: {}", record); diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java index 7ff4bff3b572f4..e4a76f83b6423c 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java @@ -129,8 +129,8 @@ public String generateAccessToken(@Nonnull final TokenType type, @Nonnull final // Need this to write key aspect final List additionalChanges = AspectUtils.getAdditionalChanges(proposal, _entityService); - _entityService.ingestProposal(proposal, auditStamp); - additionalChanges.forEach(mcp -> _entityService.ingestProposal(mcp, auditStamp)); + _entityService.ingestProposal(proposal, auditStamp, false); + additionalChanges.forEach(mcp -> _entityService.ingestProposal(mcp, auditStamp, false)); return accessToken; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java index 3cf05658927cae..28fa05bb489705 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java @@ -100,7 +100,7 @@ private void ingestUpgradeRequestAspect() throws URISyntaxException { upgradeProposal.setAspect(GenericRecordUtils.serializeAspect(upgradeRequest)); upgradeProposal.setChangeType(ChangeType.UPSERT); - _entityService.ingestProposal(upgradeProposal, auditStamp); + _entityService.ingestProposal(upgradeProposal, auditStamp, false); } private void ingestUpgradeResultAspect() throws URISyntaxException { @@ -115,7 +115,7 @@ private void ingestUpgradeResultAspect() throws URISyntaxException { upgradeProposal.setAspect(GenericRecordUtils.serializeAspect(upgradeResult)); upgradeProposal.setChangeType(ChangeType.UPSERT); - _entityService.ingestProposal(upgradeProposal, auditStamp); + _entityService.ingestProposal(upgradeProposal, auditStamp, false); } private void cleanUpgradeAfterError(Exception e, String errorMessage) { diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java index ab7716af529353..79b4dc0f5976ea 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java @@ -167,7 +167,7 @@ private void ingestPolicy(final Urn urn, final DataHubPolicyInfo info) throws UR keyAspectProposal.setEntityUrn(urn); _entityService.ingestProposal(keyAspectProposal, - new AuditStamp().setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis())); + new AuditStamp().setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()), false); final MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(urn); @@ -177,7 +177,7 @@ private void ingestPolicy(final Urn urn, final DataHubPolicyInfo info) throws UR proposal.setChangeType(ChangeType.UPSERT); _entityService.ingestProposal(proposal, - new AuditStamp().setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis())); + new AuditStamp().setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()), false); } private boolean hasPolicy(Urn policyUrn) { diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java index ff441caf0c5b2f..6baa71a51202c2 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java @@ -96,7 +96,7 @@ private void ingestRole(final Urn roleUrn, final DataHubRoleInfo dataHubRoleInfo keyAspectProposal.setEntityUrn(roleUrn); _entityService.ingestProposal(keyAspectProposal, - new AuditStamp().setActor(Urn.createFromString(SYSTEM_ACTOR)).setTime(System.currentTimeMillis())); + new AuditStamp().setActor(Urn.createFromString(SYSTEM_ACTOR)).setTime(System.currentTimeMillis()), false); final MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(roleUrn); @@ -106,7 +106,7 @@ private void ingestRole(final Urn roleUrn, final DataHubRoleInfo dataHubRoleInfo proposal.setChangeType(ChangeType.UPSERT); _entityService.ingestProposal(proposal, - new AuditStamp().setActor(Urn.createFromString(SYSTEM_ACTOR)).setTime(System.currentTimeMillis())); + new AuditStamp().setActor(Urn.createFromString(SYSTEM_ACTOR)).setTime(System.currentTimeMillis()), false); _entityService.produceMetadataChangeLog(roleUrn, DATAHUB_ROLE_ENTITY_NAME, DATAHUB_ROLE_INFO_ASPECT_NAME, roleInfoAspectSpec, null, dataHubRoleInfo, null, null, auditStamp, ChangeType.RESTATE); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RemoveClientIdAspectStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RemoveClientIdAspectStep.java index b76d935a060227..2a334d7bbb6c23 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RemoveClientIdAspectStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RemoveClientIdAspectStep.java @@ -70,7 +70,7 @@ private void ingestUpgradeAspect(String aspectName, RecordTemplate aspect, Audit upgradeProposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); upgradeProposal.setChangeType(ChangeType.UPSERT); - _entityService.ingestProposal(upgradeProposal, auditStamp); + _entityService.ingestProposal(upgradeProposal, auditStamp, false); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java index 0309d2d4b4890c..989ee1a39b169d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java @@ -168,6 +168,6 @@ private void ingestUpgradeAspect(String aspectName, RecordTemplate aspect, Audit upgradeProposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); upgradeProposal.setChangeType(ChangeType.UPSERT); - _entityService.ingestProposal(upgradeProposal, auditStamp); + _entityService.ingestProposal(upgradeProposal, auditStamp, false); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java index 9afc1f8c2f8a57..b990400b38491e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java @@ -128,7 +128,8 @@ private void migrateBrowsePath(Urn urn, AuditStamp auditStamp) throws Exception proposal.setAspect(GenericRecordUtils.serializeAspect(newPaths)); _entityService.ingestProposal( proposal, - auditStamp + auditStamp, + false ); } diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java index ecac6ad05b0718..64120787e486b0 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreGlossaryIndicesTest.java @@ -104,7 +104,8 @@ public void testExecuteFirstTime() throws Exception { Mockito.verify(mockRegistry, Mockito.times(1)).getEntitySpec(Constants.GLOSSARY_NODE_ENTITY_NAME); Mockito.verify(mockService, Mockito.times(2)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), + Mockito.eq(false) ); Mockito.verify(mockService, Mockito.times(1)).produceMetadataChangeLog( Mockito.eq(glossaryTermUrn), @@ -164,7 +165,8 @@ public void testExecutesWithNewVersion() throws Exception { Mockito.verify(mockRegistry, Mockito.times(1)).getEntitySpec(Constants.GLOSSARY_NODE_ENTITY_NAME); Mockito.verify(mockService, Mockito.times(2)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), + Mockito.eq(false) ); Mockito.verify(mockService, Mockito.times(1)).produceMetadataChangeLog( Mockito.eq(glossaryTermUrn), @@ -220,7 +222,8 @@ public void testDoesNotRunWhenAlreadyExecuted() throws Exception { Mockito.verify(mockSearchService, Mockito.times(0)).search(Constants.GLOSSARY_NODE_ENTITY_NAME, "", null, null, 0, 1000); Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), + Mockito.anyBoolean() ); Mockito.verify(mockService, Mockito.times(0)).produceMetadataChangeLog( Mockito.eq(glossaryTermUrn), diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java index 8891657050d289..5e4ad6e7fe8809 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java @@ -89,7 +89,8 @@ public void testExecuteNoExistingBrowsePaths() throws Exception { // Verify that 4 aspects are ingested, 2 for the upgrade request / result, but none for ingesting Mockito.verify(mockService, Mockito.times(2)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any() + Mockito.any(), + Mockito.eq(false) ); } @@ -156,7 +157,8 @@ public void testExecuteFirstTime() throws Exception { // Verify that 4 aspects are ingested, 2 for the upgrade request / result and 2 for the browse pahts Mockito.verify(mockService, Mockito.times(4)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any() + Mockito.any(), + Mockito.eq(false) ); } @@ -223,7 +225,8 @@ public void testDoesNotRunWhenBrowsePathIsNotQualified() throws Exception { // Verify that 2 aspects are ingested, only those for the upgrade step Mockito.verify(mockService, Mockito.times(2)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any() + Mockito.any(), + Mockito.eq(false) ); } @@ -248,7 +251,8 @@ public void testDoesNotRunWhenAlreadyExecuted() throws Exception { Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class) + Mockito.any(AuditStamp.class), + Mockito.anyBoolean() ); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index c0474d7125ad13..a02853d7757e1d 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -294,9 +294,9 @@ public static Pair ingestProposal(MetadataChangeProposal metada log.info("Proposal: {}", serviceProposal); Throwable exceptionally = null; try { - EntityService.IngestProposalResult proposalResult = entityService.ingestProposal(serviceProposal, auditStamp); + EntityService.IngestProposalResult proposalResult = entityService.ingestProposal(serviceProposal, auditStamp, false); Urn urn = proposalResult.getUrn(); - additionalChanges.forEach(proposal -> entityService.ingestProposal(proposal, auditStamp)); + additionalChanges.forEach(proposal -> entityService.ingestProposal(proposal, auditStamp, false)); return new Pair<>(urn.toString(), proposalResult.isDidUpdate()); } catch (ValidationException ve) { exceptionally = ve; diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json index cc8d9630f504c4..0d41df169a7364 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.aspects.restspec.json @@ -72,6 +72,10 @@ "parameters" : [ { "name" : "proposal", "type" : "com.linkedin.mxe.MetadataChangeProposal" + }, { + "name" : "async", + "type" : "string", + "default" : "unset" } ], "returns" : "string" }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 053986c4c14ff0..31fc619700a138 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -3647,6 +3647,10 @@ "parameters" : [ { "name" : "proposal", "type" : "com.linkedin.mxe.MetadataChangeProposal" + }, { + "name" : "async", + "type" : "string", + "default" : "unset" } ], "returns" : "string" }, { diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index e870fcdf735f90..a37063bd3fb57f 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -276,22 +276,40 @@ public List getTimeseriesAspectValues(@Nonnull String urn, @Non @Nonnull Boolean getLatestValue, @Nullable Filter filter, @Nonnull Authentication authentication) throws RemoteInvocationException; - public String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, - @Nonnull final Authentication authentication) throws RemoteInvocationException; + @Deprecated + default String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, + @Nonnull final Authentication authentication) throws RemoteInvocationException { + return ingestProposal(metadataChangeProposal, authentication, false); + } + String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, + @Nonnull final Authentication authentication, final boolean async) throws RemoteInvocationException; + + @Deprecated default String wrappedIngestProposal(@Nonnull MetadataChangeProposal metadataChangeProposal, @Nonnull final Authentication authentication) { + return wrappedIngestProposal(metadataChangeProposal, authentication, false); + } + + default String wrappedIngestProposal(@Nonnull MetadataChangeProposal metadataChangeProposal, + @Nonnull final Authentication authentication, final boolean async) { try { - return ingestProposal(metadataChangeProposal, authentication); + return ingestProposal(metadataChangeProposal, authentication, async); } catch (RemoteInvocationException e) { throw new RuntimeException(e); } } + @Deprecated default List batchIngestProposals(@Nonnull final Collection metadataChangeProposals, @Nonnull final Authentication authentication) throws RemoteInvocationException { + return batchIngestProposals(metadataChangeProposals, authentication, false); + } + + default List batchIngestProposals(@Nonnull final Collection metadataChangeProposals, + @Nonnull final Authentication authentication, final boolean async) throws RemoteInvocationException { return metadataChangeProposals.stream() - .map(proposal -> wrappedIngestProposal(proposal, authentication)) + .map(proposal -> wrappedIngestProposal(proposal, authentication, async)) .collect(Collectors.toList()); } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index ef03ba6d34f4e9..9e8dcf522022e3 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -624,8 +624,9 @@ public List getTimeseriesAspectValues(@Nonnull String urn, @Non * Ingest a MetadataChangeProposal event. * @return */ + @Override public String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, - @Nonnull final Authentication authentication) throws RemoteInvocationException { + @Nonnull final Authentication authentication, final boolean async) throws RemoteInvocationException { final AspectsDoIngestProposalRequestBuilder requestBuilder = ASPECTS_REQUEST_BUILDERS.actionIngestProposal().proposalParam(metadataChangeProposal); return sendClientRequest(requestBuilder, authentication).getEntity(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 2199fb10834592..4ea4aa70db2b47 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -62,6 +62,10 @@ public class AspectResource extends CollectionResourceTaskTemplate getTimeseriesAspectValues( @Nonnull @WithSpan public Task ingestProposal( - @ActionParam(PARAM_PROPOSAL) @Nonnull MetadataChangeProposal metadataChangeProposal) throws URISyntaxException { + @ActionParam(PARAM_PROPOSAL) @Nonnull MetadataChangeProposal metadataChangeProposal, + @ActionParam(PARAM_ASYNC) @Optional(UNSET) String async) throws URISyntaxException { log.info("INGEST PROPOSAL proposal: {}", metadataChangeProposal); + boolean asyncBool; + if (UNSET.equals(async)) { + asyncBool = Boolean.parseBoolean(System.getenv(ASYNC_INGEST_DEFAULT_NAME)); + } else { + asyncBool = Boolean.parseBoolean(async); + } + Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); @@ -147,8 +159,8 @@ public Task ingestProposal( return RestliUtil.toTask(() -> { log.debug("Proposal: {}", metadataChangeProposal); try { - Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp).getUrn(); - additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp)); + Urn urn = _entityService.ingestProposal(metadataChangeProposal, auditStamp, asyncBool).getUrn(); + additionalChanges.forEach(proposal -> _entityService.ingestProposal(proposal, auditStamp, asyncBool)); tryIndexRunId(urn, metadataChangeProposal.getSystemMetadata(), _entitySearchService); return urn.toString(); } catch (ValidationException e) { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java index dd3a95dbae79ee..0a32e5af0fca92 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java @@ -268,7 +268,8 @@ private void updateExecutionRequestStatus(String runId, String status) { proposal.setAspect(GenericRecordUtils.serializeAspect(requestResult)); proposal.setChangeType(ChangeType.UPSERT); - _entityService.ingestProposal(proposal, new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis())); + _entityService.ingestProposal(proposal, + new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()), false); } } catch (Exception e) { log.error(String.format("Not able to update execution result aspect with runId %s and new status %s.", runId, status), e); From bea5a0751e6b6c9a2fb316d43103343c7ffd3ee2 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Mon, 3 Oct 2022 22:38:09 -0700 Subject: [PATCH 24/76] chore(deps): upgrade opentelemetry dependencies (#6100) Co-authored-by: Pedro Silva --- build.gradle | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 02bebf5ebf14bc..dd311234fdc47e 100644 --- a/build.gradle +++ b/build.gradle @@ -4,6 +4,7 @@ buildscript { ext.mavenVersion = '3.6.3' ext.springVersion = '5.3.20' ext.springBootVersion = '2.5.12' + ext.openTelemetryVersion = '1.18.0' ext.neo4jVersion = '4.4.9' ext.graphQLJavaVersion = '19.0' apply from: './repositories.gradle' @@ -114,8 +115,8 @@ project.ext.externalDependency = [ 'mysqlConnector': 'mysql:mysql-connector-java:8.0.20', 'neo4jHarness': 'org.neo4j.test:neo4j-harness:' + neo4jVersion, 'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jVersion, - 'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:1.0.0', - 'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:1.0.0', + 'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:' + openTelemetryVersion, + 'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion, 'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15', 'parquet': 'org.apache.parquet:parquet-avro:1.12.2', 'picocli': 'info.picocli:picocli:4.5.0', From e2c5d2a4ce9661cfd0c717798c068fcad53ad7c0 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Tue, 4 Oct 2022 06:05:04 -0700 Subject: [PATCH 25/76] feat(ui): bump max recommendations for Platforms (#6113) --- .../recommendation/candidatesource/TopPlatformsSource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java index 224eecc38ba043..f81a91be0660a1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java @@ -59,7 +59,7 @@ protected String getSearchFieldName() { @Override protected int getMaxContent() { - return 20; + return 40; } @Override From d52fde126fa33693e53cc764d09842f045f84a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roko=20Gudi=C4=87?= Date: Tue, 4 Oct 2022 18:05:53 +0200 Subject: [PATCH 26/76] feat(ingest): salesforce - add sandbox support (#6105) Co-authored-by: rgudic --- .../src/datahub/ingestion/source/salesforce.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index cfbff4b018f549..7b9ab6dd789b9d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -90,6 +90,10 @@ class SalesforceConfig(DatasetSourceConfigBase): instance_url: Optional[str] = Field( description="Salesforce instance url. e.g. https://MyDomainName.my.salesforce.com" ) + # Flag to indicate whether the instance is production or sandbox + is_sandbox: bool = Field( + default=False, description="Connect to Sandbox instance of your Salesforce" + ) access_token: Optional[str] = Field(description="Access token for instance url") ingest_tags: Optional[bool] = Field( @@ -206,6 +210,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: instance_url=self.config.instance_url, session_id=self.config.access_token, session=self.session, + domain="test" if self.config.is_sandbox else None, ) elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD: logger.debug("Username/Password Provided in Config") @@ -224,6 +229,7 @@ def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None: password=self.config.password, security_token=self.config.security_token, session=self.session, + domain="test" if self.config.is_sandbox else None, ) except Exception as e: From 396fd31ddcd8e85e5f3b9162b7697139bbd276f3 Mon Sep 17 00:00:00 2001 From: Peter Szalai Date: Tue, 4 Oct 2022 18:07:44 +0200 Subject: [PATCH 27/76] fix(mae-consumer): use JAVA_TOOL_OPTIONS instead of JDK_JAVA_OPTIONS (#6114) --- docker/datahub-mae-consumer/start.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/datahub-mae-consumer/start.sh b/docker/datahub-mae-consumer/start.sh index a6c5c96842b856..0a822effe713bc 100755 --- a/docker/datahub-mae-consumer/start.sh +++ b/docker/datahub-mae-consumer/start.sh @@ -34,13 +34,13 @@ if [[ ${GRAPH_SERVICE_IMPL:-} != elasticsearch ]] && [[ ${SKIP_NEO4J_CHECK:-fals dockerize_args+=("-wait" "$NEO4J_HOST") fi -JDK_JAVA_OPTIONS="${JDK_JAVA_OPTIONS:-}${JAVA_OPTS:+ JAVA_OPTS}${JMX_OPTS:+ JMX_OPTS}" +JAVA_TOOL_OPTIONS="${JDK_JAVA_OPTIONS:-}${JAVA_OPTS:+ JAVA_OPTS}${JMX_OPTS:+ JMX_OPTS}" if [[ ${ENABLE_OTEL:-false} == true ]]; then - JDK_JAVA_OPTIONS="$JDK_JAVA_OPTIONS -javaagent:opentelemetry-javaagent-all.jar" + JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -javaagent:opentelemetry-javaagent-all.jar" fi if [[ ${ENABLE_PROMETHEUS:-false} == true ]]; then - JDK_JAVA_OPTIONS="$JDK_JAVA_OPTIONS -javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml" + JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml" fi -export JDK_JAVA_OPTIONS +export JAVA_TOOL_OPTIONS exec dockerize "${dockerize_args[@]}" java -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar From ce90310dd0c7cc8506006cb667bbd7d4deb2070b Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Tue, 4 Oct 2022 10:20:04 -0700 Subject: [PATCH 28/76] feat(advanced-search): Complete Advanced Search: backend changes & tying UI together (#6068) * stashing progress * adding remove option * more progress * editing * further in * additional rendering improvements * stashing adv search progress * stashing more progress * propagating not filters back to UI * more frontend progress * more filters working * getting ready for data platform selector * add platform select functionality * locking out switching btwn advanced and standard filters * final polish * remove unneeded code * added unit and cypress tests * resolutions after merge * adding documentation * cleaning up & refactoring * removing console.log * minor ui fix & removing unneeded code * fixing lineage search * fixing lints * fix display of degree * fixing test * fixing lint * responding to comments * fixing tests * fix smoke tests * fixing cypress * fixing cypress test * responding to comments --- .../graphql/resolvers/ResolverUtils.java | 72 +++++- .../assertion/AssertionRunEventResolver.java | 9 +- .../auth/ListAccessTokensResolver.java | 5 +- .../load/TimeSeriesAspectResolver.java | 3 +- .../ListRecommendationsResolver.java | 9 +- .../search/SearchAcrossEntitiesResolver.java | 2 +- .../search/SearchAcrossLineageResolver.java | 4 +- .../resolvers/search/SearchResolver.java | 14 +- .../src/main/resources/recommendation.graphql | 24 +- .../src/main/resources/search.graphql | 65 ++++-- .../auth/ListAccessTokensResolverTest.java | 11 +- datahub-web-react/public/meta-favicon.ico | Bin 0 -> 242942 bytes datahub-web-react/src/Mocks.tsx | 112 +++++++--- .../entity/container/ContainerEntitiesTab.tsx | 2 +- .../src/app/entity/group/GroupAssets.tsx | 2 +- .../styled/search/EmbeddedListSearch.tsx | 17 +- .../styled/search/EmbeddedListSearchModal.tsx | 5 + .../search/EmbeddedListSearchResults.tsx | 46 ++-- .../search/EmbeddedListSearchSection.tsx | 20 ++ .../components/styled/search/SearchSelect.tsx | 7 +- .../search/navigateToEntitySearchUrl.ts | 6 +- .../shared/tabs/Lineage/ImpactAnalysis.tsx | 4 +- .../src/app/entity/user/UserAssets.tsx | 2 +- .../src/app/home/HomePageHeader.tsx | 1 + .../src/app/ingest/source/IngestedAssets.tsx | 4 +- .../component/GlossaryTermSearchList.tsx | 2 +- .../renderer/component/TagSearchList.tsx | 2 +- .../search/AdvancedSearchAddFilterSelect.tsx | 2 + .../src/app/search/AdvancedSearchFilter.tsx | 6 +- .../AdvancedSearchFilterValuesSection.tsx | 2 +- .../src/app/search/AdvancedSearchFilters.tsx | 10 +- .../src/app/search/EditTextModal.tsx | 8 +- .../src/app/search/SearchFiltersSection.tsx | 99 +++++++++ .../src/app/search/SearchPage.tsx | 23 +- .../src/app/search/SearchResults.tsx | 53 ++--- ...earchFilter.tsx => SimpleSearchFilter.tsx} | 11 +- ...rchFilters.tsx => SimpleSearchFilters.tsx} | 35 ++- .../src/app/search/utils/constants.ts | 1 + .../search/utils/filtersToGraphqlParams.ts | 10 - .../utils/filtersToQueryStringParams.ts | 40 +++- .../src/app/search/utils/generateOrFilters.ts | 20 ++ .../app/search/utils/hasAdvancedFilters.ts | 12 + .../app/search/utils/navigateToSearchUrl.ts | 36 +-- .../src/app/search/utils/useFilters.ts | 34 ++- .../src/app/settings/AccessTokens.tsx | 2 +- .../src/app/shared/tags/AddTagsTermsModal.tsx | 3 +- .../src/graphql/analytics.graphql | 2 +- .../src/graphql/recommendations.graphql | 2 +- docs/how/search.md | 46 +++- metadata-ingestion/ingest_schema.py | 0 .../metadata/search/LineageSearchService.java | 2 +- .../AllEntitiesSearchAggregator.java | 3 +- .../query/request/SearchRequestHandler.java | 42 ++-- .../metadata/search/utils/ESUtils.java | 58 ++++- .../metadata/search/utils/QueryUtils.java | 3 +- .../metadata/search/utils/SearchUtils.java | 3 +- .../metadata/search/SearchServiceTest.java | 206 ++++++++++++++++++ .../metadata/query/filter/Criterion.pdl | 7 +- ...linkedin.analytics.analytics.snapshot.json | 5 + .../com.linkedin.entity.aspects.snapshot.json | 5 + ...com.linkedin.entity.entities.snapshot.json | 5 + .../cypress/integration/search/search.js | 193 +++++++++++++--- smoke-test/tests/cypress/data.json | 43 +++- .../tokens/revokable_access_token_test.py | 10 +- 64 files changed, 1137 insertions(+), 355 deletions(-) create mode 100644 datahub-web-react/public/meta-favicon.ico create mode 100644 datahub-web-react/src/app/search/SearchFiltersSection.tsx rename datahub-web-react/src/app/search/{SearchFilter.tsx => SimpleSearchFilter.tsx} (93%) rename datahub-web-react/src/app/search/{SearchFilters.tsx => SimpleSearchFilters.tsx} (70%) delete mode 100644 datahub-web-react/src/app/search/utils/filtersToGraphqlParams.ts create mode 100644 datahub-web-react/src/app/search/utils/generateOrFilters.ts create mode 100644 datahub-web-react/src/app/search/utils/hasAdvancedFilters.ts create mode 100644 metadata-ingestion/ingest_schema.py diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index 7ecb8548519c13..bc9af99afc190c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -3,10 +3,13 @@ import com.datahub.authentication.Authentication; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.OrFilter; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; @@ -81,20 +84,77 @@ public static Map buildFacetFilters(@Nullable List criterionListFromAndFilter(List andFilters) { + return andFilters != null && !andFilters.isEmpty() + ? andFilters.stream() + .map(filter -> criterionFromFilter(filter)) + .collect(Collectors.toList()) : Collections.emptyList(); + + } + + // In the case that user sends filters to be or-d together, we need to build a series of conjunctive criterion + // arrays, rather than just one for the AND case. + public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( + @Nonnull List orFilters + ) { + return new ConjunctiveCriterionArray(orFilters.stream().map(orFilter -> { + CriterionArray andCriterionForOr = new CriterionArray(criterionListFromAndFilter(orFilter.getAnd())); + return new ConjunctiveCriterion().setAnd( + andCriterionForOr + ); + } + ).collect(Collectors.toList())); + } + @Nullable - public static Filter buildFilter(@Nullable List facetFilterInputs) { - if (facetFilterInputs == null || facetFilterInputs.isEmpty()) { + public static Filter buildFilter(@Nullable List andFilters, @Nullable List orFilters) { + if ((andFilters == null || andFilters.isEmpty()) && (orFilters == null || orFilters.isEmpty())) { return null; } - return new Filter().setOr(new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(facetFilterInputs.stream() - .map(filter -> new Criterion().setField(getFilterField(filter.getField())).setValue(filter.getValue())) - .collect(Collectors.toList()))))); + + // Or filters are the new default. We will check them first. + // If we have OR filters, we need to build a series of CriterionArrays + if (orFilters != null && !orFilters.isEmpty()) { + return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters)); + } + + // If or filters are not set, someone may be using the legacy and filters + final List andCriterions = criterionListFromAndFilter(andFilters); + return new Filter().setOr(new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(andCriterions)))); + } + + // Translates a FacetFilterInput (graphql input class) into Criterion (our internal model) + public static Criterion criterionFromFilter(final FacetFilterInput filter) { + Criterion result = new Criterion(); + result.setField(getFilterField(filter.getField())); + if (filter.getValues() != null) { + result.setValues(new StringArray(filter.getValues())); + if (!filter.getValues().isEmpty()) { + result.setValue(filter.getValues().get(0)); + } else { + result.setValue(""); + } + } + + if (filter.getCondition() != null) { + result.setCondition(Condition.valueOf(filter.getCondition().toString())); + } else { + result.setCondition(Condition.EQUAL); + } + + if (filter.getNegated() != null) { + result.setNegated(filter.getNegated()); + } + + return result; } private static String getFilterField(final String originalField) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java index 630e6718ba0dae..fafac5df0360a5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java @@ -9,14 +9,12 @@ import com.linkedin.datahub.graphql.generated.AssertionRunStatus; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.FilterInput; -import com.linkedin.datahub.graphql.generated.SearchCondition; import com.linkedin.datahub.graphql.types.dataset.mappers.AssertionRunEventMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.r2.RemoteInvocationException; @@ -102,13 +100,16 @@ private Filter buildFilter(@Nullable FilterInput filtersInput, @Nullable final S } List facetFilters = new ArrayList<>(); if (status != null) { - facetFilters.add(new FacetFilterInput("status", status, ImmutableList.of(status), false, SearchCondition.EQUAL)); + FacetFilterInput filter = new FacetFilterInput(); + filter.setField("status"); + filter.setValues(ImmutableList.of(status)); + facetFilters.add(filter); } if (filtersInput != null) { facetFilters.addAll(filtersInput.getAnd()); } return new Filter().setOr(new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(facetFilters.stream() - .map(filter -> new Criterion().setField(filter.getField()).setValue(filter.getValue())) + .map(filter -> criterionFromFilter(filter)) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java index b6bd4a7d89c89d..96900f7a50a831 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.auth; +import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; @@ -55,7 +56,7 @@ public CompletableFuture get(DataFetchingEnvironment envi new SortCriterion().setField(EXPIRES_AT_FIELD_NAME).setOrder(SortOrder.DESCENDING); final SearchResult searchResult = _entityClient.search(Constants.ACCESS_TOKEN_ENTITY_NAME, "", - buildFilter(filters), sortCriterion, start, count, + buildFilter(filters, Collections.emptyList()), sortCriterion, start, count, getAuthentication(environment)); final List tokens = searchResult.getEntities().stream().map(entity -> { @@ -94,6 +95,6 @@ public CompletableFuture get(DataFetchingEnvironment envi */ private boolean isListingSelfTokens(final List filters, final QueryContext context) { return AuthorizationUtils.canGeneratePersonalAccessToken(context) && filters.stream() - .anyMatch(filter -> filter.getField().equals("ownerUrn") && filter.getValue().equals(context.getActorUrn())); + .anyMatch(filter -> filter.getField().equals("ownerUrn") && filter.getValues().equals(ImmutableList.of(context.getActorUrn()))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index 86d9c233a2c994..2bae315d0a0a97 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -12,7 +12,6 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.r2.RemoteInvocationException; @@ -111,7 +110,7 @@ private Filter buildFilters(@Nullable FilterInput maybeFilters) { return null; } return new Filter().setOr(new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(maybeFilters.getAnd().stream() - .map(filter -> new Criterion().setField(filter.getField()).setValue(filter.getValue())) + .map(filter -> criterionFromFilter(filter)) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index fc0f1732b56324..f71a7143aa6eb2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.recommendation; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.ContentParams; import com.linkedin.datahub.graphql.generated.EntityProfileParams; @@ -14,7 +15,6 @@ import com.linkedin.datahub.graphql.generated.SearchParams; import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.recommendation.EntityRequestContext; import com.linkedin.metadata.recommendation.RecommendationsService; @@ -31,7 +31,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; @Slf4j @@ -88,7 +88,7 @@ private com.linkedin.metadata.recommendation.RecommendationRequestContext mapReq searchRequestContext.setFilters(new CriterionArray(requestContext.getSearchRequestContext() .getFilters() .stream() - .map(facetField -> new Criterion().setField(facetField.getField()).setValue(facetField.getValue())) + .map(facetField -> criterionFromFilter(facetField)) .collect(Collectors.toList()))); } mappedRequestContext.setSearchRequestContext(searchRequestContext); @@ -148,7 +148,8 @@ private RecommendationParams mapRecommendationParams( searchParams.setFilters(params.getSearchParams() .getFilters() .stream() - .map(criterion -> Filter.builder().setField(criterion.getField()).setValue(criterion.getValue()).build()) + .map(criterion -> Filter.builder().setField(criterion.getField()).setValues( + ImmutableList.of(criterion.getValue())).build()) .collect(Collectors.toList())); } mappedParams.setSearchParams(searchParams); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index adbac977819be9..20a6738c2abca5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -52,7 +52,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) "Executing search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", input.getTypes(), input.getQuery(), input.getFilters(), start, count); return UrnSearchResultsMapper.map(_entityClient.searchAcrossEntities(entityNames, sanitizedQuery, - ResolverUtils.buildFilter(input.getFilters()), start, count, ResolverUtils.getAuthentication(environment))); + ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()), start, count, ResolverUtils.getAuthentication(environment))); } catch (Exception e) { log.error( "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java index e4485e25439bdd..f63bfd31e01154 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java @@ -72,7 +72,7 @@ public CompletableFuture get(DataFetchingEnvironment urn, resolvedDirection, input.getTypes(), input.getQuery(), filters, start, count); return UrnSearchAcrossLineageResultsMapper.map( _entityClient.searchAcrossLineage(urn, resolvedDirection, entityNames, sanitizedQuery, - maxHops, ResolverUtils.buildFilter(filters), null, start, count, + maxHops, ResolverUtils.buildFilter(filters, input.getOrFilters()), null, start, count, ResolverUtils.getAuthentication(environment))); } catch (RemoteInvocationException e) { log.error( @@ -89,7 +89,7 @@ public CompletableFuture get(DataFetchingEnvironment private Integer getMaxHops(List filters) { Set degreeFilterValues = filters.stream() .filter(filter -> filter.getField().equals("degree")) - .map(FacetFilterInput::getValue) + .flatMap(filter -> filter.getValues().stream()) .collect(Collectors.toSet()); Integer maxHops = null; if (!degreeFilterValues.contains("3+")) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index 1f3df870e45a38..4db2ee957b5bc2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -41,17 +41,17 @@ public CompletableFuture get(DataFetchingEnvironment environment) return CompletableFuture.supplyAsync(() -> { try { - log.debug("Executing search. entity type {}, query {}, filters: {}, start: {}, count: {}", input.getType(), - input.getQuery(), input.getFilters(), start, count); + log.debug("Executing search. entity type {}, query {}, filters: {}, orFilters: {}, start: {}, count: {}", input.getType(), + input.getQuery(), input.getFilters(), input.getOrFilters(), start, count); return UrnSearchResultsMapper.map( - _entityClient.search(entityName, sanitizedQuery, ResolverUtils.buildFilter(input.getFilters()), null, start, + _entityClient.search(entityName, sanitizedQuery, ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()), null, start, count, ResolverUtils.getAuthentication(environment))); } catch (Exception e) { - log.error("Failed to execute search: entity type {}, query {}, filters: {}, start: {}, count: {}", - input.getType(), input.getQuery(), input.getFilters(), start, count); + log.error("Failed to execute search: entity type {}, query {}, filters: {}, orFilters: {}, start: {}, count: {}", + input.getType(), input.getQuery(), input.getFilters(), input.getOrFilters(), start, count); throw new RuntimeException( - "Failed to execute search: " + String.format("entity type %s, query %s, filters: %s, start: %s, count: %s", - input.getType(), input.getQuery(), input.getFilters(), start, count), e); + "Failed to execute search: " + String.format("entity type %s, query %s, filters: %s, orFilters: %s, start: %s, count: %s", + input.getType(), input.getQuery(), input.getFilters(), input.getOrFilters(), start, count), e); } }); } diff --git a/datahub-graphql-core/src/main/resources/recommendation.graphql b/datahub-graphql-core/src/main/resources/recommendation.graphql index 5f1340ba452542..4e4bd14052aff9 100644 --- a/datahub-graphql-core/src/main/resources/recommendation.graphql +++ b/datahub-graphql-core/src/main/resources/recommendation.graphql @@ -217,7 +217,7 @@ type SearchParams { """ Entity types to be searched. If this is not provided, all entities will be searched. """ - types: [EntityType!] + types: [EntityType!] """ Search query @@ -237,12 +237,22 @@ type Filter { """ Name of field to filter by """ - field: String! + field: String! - """ - Value of the field to filter by - """ - value: String! + """ + Values, one of which the intended field should match. + """ + values: [String!]! + + """ + If the filter should or should not be matched + """ + negated: Boolean + + """ + Condition for the values. How to If unset, assumed to be equality + """ + condition: FilterOperator } """ @@ -269,4 +279,4 @@ type ContentParams { Number of entities corresponding to the recommended content """ count: Long -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index cf409dc29a6e9f..b57de93da28a37 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -65,9 +65,15 @@ input SearchInput { count: Int """ - Facet filters to apply to search results + Deprecated in favor of the more expressive orFilters field + Facet filters to apply to search results. These will be 'AND'-ed together. """ - filters: [FacetFilterInput!] + filters: [FacetFilterInput!] @deprecated(reason: "Use `orFilters`- they are more expressive") + + """ + A list of disjunctive criterion for the filter. (or operation to combine filters) + """ + orFilters: [OrFilter!] } """ @@ -95,9 +101,15 @@ input SearchAcrossEntitiesInput { count: Int """ - Faceted filters applied to search results + Deprecated in favor of the more expressive orFilters field + Facet filters to apply to search results. These will be 'AND'-ed together. """ - filters: [FacetFilterInput!] + filters: [FacetFilterInput!] @deprecated(reason: "Use `orFilters`- they are more expressive") + + """ + A list of disjunctive criterion for the filter. (or operation to combine filters) + """ + orFilters: [OrFilter!] } """ @@ -135,9 +147,25 @@ input SearchAcrossLineageInput { count: Int """ - Faceted filters applied to search results + Deprecated in favor of the more expressive orFilters field + Facet filters to apply to search results. These will be 'AND'-ed together. """ - filters: [FacetFilterInput!] + filters: [FacetFilterInput!] @deprecated(reason: "Use `orFilters`- they are more expressive") + + """ + A list of disjunctive criterion for the filter. (or operation to combine filters) + """ + orFilters: [OrFilter!] +} + +""" +A list of disjunctive criterion for the filter. (or operation to combine filters) +""" +input OrFilter { + """ + A list of and criteria the filter applies to the query + """ + and: [FacetFilterInput!] } """ @@ -150,14 +178,9 @@ input FacetFilterInput { field: String! """ - Value of the field to filter by (soon to be deprecated) + Values, one of which the intended field should match. """ - value: String! - - """ - Values of the field to filter by - """ - values: [String!] + values: [String!]! """ If the filter should or should not be matched @@ -165,12 +188,12 @@ input FacetFilterInput { negated: Boolean """ - Condition for the values. If unset, assumed to be equality + Condition for the values. How to If unset, assumed to be equality """ - condition: SearchCondition + condition: FilterOperator } -enum SearchCondition { +enum FilterOperator { """ Represent the relation: String field contains value, e.g. name contains Profile """ @@ -508,9 +531,15 @@ input BrowseInput { count: Int """ - Faceted filters applied to browse results + Deprecated in favor of the more expressive orFilters field + Facet filters to apply to search results. These will be 'AND'-ed together. """ - filters: [FacetFilterInput!] + filters: [FacetFilterInput!] @deprecated(reason: "Use `orFilters`- they are more expressive") + + """ + A list of disjunctive criterion for the filter. (or operation to combine filters) + """ + orFilters: [OrFilter!] } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index 1ef3c101e6c2a4..8c23335b7e9d3c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -6,10 +6,10 @@ import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.ListAccessTokenInput; import com.linkedin.datahub.graphql.generated.ListAccessTokenResult; -import com.linkedin.datahub.graphql.generated.SearchCondition; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; import junit.framework.TestCase; import org.mockito.Mockito; @@ -27,15 +27,18 @@ public void testGetSuccess() throws Exception { final ListAccessTokenInput input = new ListAccessTokenInput(); input.setStart(0); input.setCount(100); - final ImmutableList filters = ImmutableList.of(new FacetFilterInput("actor", - "urn:li:corpuser:test", ImmutableList.of("urn:li:corpuser:test"), false, SearchCondition.EQUAL)); + FacetFilterInput filter = new FacetFilterInput(); + filter.setField("actor"); + filter.setValues(ImmutableList.of("urn:li:corpuser:test")); + final ImmutableList filters = ImmutableList.of(filter); + input.setFilters(filters); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); final EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when(mockClient.filter( Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), - Mockito.eq(buildFilter(filters)), + Mockito.eq(buildFilter(filters, Collections.emptyList())), Mockito.notNull(), Mockito.eq(input.getStart()), Mockito.eq(input.getCount()), diff --git a/datahub-web-react/public/meta-favicon.ico b/datahub-web-react/public/meta-favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..1587e379073865767be6c13c239fd950d0f29f25 GIT binary patch literal 242942 zcmeHwcXSk2mcBRd*>Luc4QIp5Z@iN{GrOLho$*fCnVqo`IY$Ey1e3uAgH5(^0%LF{ zXOWQz5+INeNeBrLNgyGF5Xw2{EZyJzUiGW4ZUrRK)m`0Pk8`O~^{W@|yWf5H-52Va zXa0!)J(Kqg|NrbW1*iVxnSXlbnP+~)gvU3T@LYt=f1+G9{}WLKlNn47m>lpv4gmWm z{=Ki=%^I5=00&I`n{r@szCV*y=f>ZprUJ$loq^^9`5*9P+HJ>DvUWJ($>b6_l@Pnly~uX7kM6`{7WI!SWzlo zsmV@N%bmw%t1HZwNwvFHXCzzV22=5k3)I%!EaIrowRO|pb3KdV&QWV~6UT3(vV?1t z8yO_hvai2V9)-u?BlrsbY6I|IoAX%IEZSdDP{L3lo`5%O7F%{Q#C}UeL((2 z*CVZUxOtJZC<*46!CX!W4$#Vm$&3r6sOsu#ua&T-Sf^L zZP#XyA;^+K!Bn08K<_Gw&)M-cj?wO&iw@Z6ah8ausu$PzThn_{Mql zdH$tPYN}M)-X;OdA62OdN?hc}`IO&}>nNHtpBV!G;AhON0E(H-#MeUpTZZh@dntBy zALX~1esp*BOll}Cq_TUl6g#({@*MBk(=30wv3LyCcahH{wqx8I+vywCAs zG!#3hFWp`_QM41c6~E_19HJP0)~HXI+SH(y13ESB@Hcd0$x8~)HGBb&uD&@?luhU-J8UEW_|;qol$9yr zvv~p|YU1*SZHb(SgLL(c{(_Ha+rrpz`K!$p{7?97zM3X@)Lc`k#;M6lp}0k(#P3-r zcdO5FlXLbkKj*du@0lQDuw#-oELHd;`DcG+-lZL0Z<7|<0&Swy&7W?qc$pf?ibNd5 zmF zy*-l360eKjO7F(93=Ocn!|$OTwerd`7`JG&4gS&BrR-X*>J8*4^p5(Tf>R+gbNf?T z&<1L4wBkeH|E-~-jH0YuCi^_E%^z&VKew-1FHW!I_T_fX-CSEm54OCgV8K$Jd+Xk` z;{Pn;pLL?b8~gJ5(-oH2c-Dz3{&#*X%7lD@W9Sxy{L3c==474ku9?Gnv_LHnY=e8B z&Q=KNG)t+lF^CB6x89?Gb8vC=YCbjzb&k zsC8<6APZU>D*McSkq+`izo4~cpvTZQNFO@6GUcv<|BcJk=L#=u_5YRWKJ=L1)dt`{ zf!jp11M}h5yRWGDuY7cmt}h&JDThBj*!*sXb`R{48%xG2^=4T`AJb5hug%64_Xfp( zIFH9$7^|xko^uuF%C%hd_MPatWzNC;`adpHzEQ4&x(KOn?9lq)%--hD}{cMG79}J(&DmT z1^(~Y;vZ$oIJQ;Awjz%K)G7Yi-a$X4ttWIx#<4Bb!nQIqjUL+LpT`{N*YA8V#nKnD z9(`w|jqTZ3Q9=n{yk*4&>OZF+%lWI!leY-E0+%xOX2fKLLG;4j-IQon+)icV$xb6IaLpFs6R zxx&Yjq`^OIIoJc*Hhi%8J;hc~YeqsBcZPrT4@ny=I)(F!V?I4T^oM5qd*QlC$Oa zZ_xDx+;bJ=`XXvKjWqB z-c?x~afMBazEtW6DI1;PANG#aA+~w4kB$Sz?l$;e`Vx<=GbsJYCM$Ls*I8kE)H32d zImSZ%C=2uh_yE0uF?e%bwa|UQ6!d~9Cu0(JFw#e`@Bi)b4<5oMka!h&AwQkr}SBbX3SV!72$Pf2;2H$~-f3D}P6|aaowrfLfQ*G#~ z|3|#StKmYwYs+F|7Yh5*V#9I1uoo&Hu|4AYU_ksU{XE8xLRQdD@4ZIV+;?b$J^nFH z?ku2vAMIJo=_&RgWE}ceVAb*x#+r~}*bDc+dPkL0$R2bNf<6Azk8HNQ-SNd1{{w|@ zPR6Ww7;hQdSZ!;_kC4|+?0?`Nc<3y&$NxCtBgHsK=oqdy#-JE;$N*fzc9OV6KbjvE zDu1@Q!uOZ7ak&~7_5f^fgs$+9aYE|;FEn~ij`g05fA}J0`B6sbDg-g^*0d+;cf+FZ z8q14>?Sf}$bNEES8`uRl_-8x5(K3z~iFWeelkg9j5j?cmDQHL7dy=5Hw7TIw`(dD0 z4G;{8|G0NXsrnQ$mb&k=cDAGq{y7e8ER6pe+5aktC=V+MG0% z{m>5?$G3?%cR!ga{D!bQBp$`MgJav{KlQ*D?C&kJ%~Q-bDD!2V;UBi>-8HjCc~HOG z?@to;xgxKf+yB5nd}Y9)ww}^YX@h^rAY&Q4l(Khq-as4QDi{D?H|$_p*Mu(?sJ;i4 zL7E%bQLw|6`2NE7#asn!gwE`L_|4_G3fDx=L)bm&)0w*J|FvxgEW;nrP*$jnXILMg zj$&Mmw76~ISBEbHcDTfU=80XR2KM;JxBxz;rYcJtvdqALugs-bEBz$=BPQ&8RfqDp zGX3ag;qNWEbxHUH)Ugxm0^y6aChy8SL*gHNRQRP}5b_2cp!M%)@edigv3RtQ35;_= z@ULLJ4_myxFh}(xB5kdI9n1%AFs8mT{YAxR%yA$)LVno4mb(9Q(MDSQBTv}LGC!Fo zd=656+Tovlml&sGPEE)P=K=q93C~H%@v10aC;dP2yt!U)HJt;IjaYp|~{<`FuG>x#0#|BCs8 zYx4$)@}Nvz;a`m9*{3G$>HFU-7Cv_9LZk=1j(Oj!oF3x9Ux9gD{H@fP#|%84M6kg> z=L=nQn{7*L8%`GK!KbGU-N8TRY0;L7&IzD&_Q@c?_PPJHVoxf%ezKUq(x&gidqd(M zeqQv6>UiBEJMhO#o82D&QrAm4fgcyXSbl+h;Mqd%W!u=#O#%Op4}DEHmyQ!-T8wKY z;hFT+RHY_z-v|G{f~9tO&hQa!E&by5_(z#xO9*?A>je3O>>|FHn`8NX68<4WvW~dI z|DP46*u?Gnf5r%~3V*fie}P-{_r*6Z3b6w=WPOkx=J~q{uy^i!_`1-~3jU@GABsKx zli62*F`J0XI7M98JeYI9yf^x7nQvG4M_J(CMA^V^j0NGRRrtU-V||7GTbVx%V3`>v z#wf78VZ%X&ptlk1@sIpbE-gRto}6P6FY$lf!9R39{D>kg<}vCGUR9^FUQlfjE-(9H z3=|BA|30FBLSJ8=bc5S8=_{Bj}Jp~24nS8S|A=|^EaK#x|W#ET@gd7@5F!awX~sVl^on(b@k4{Xb|IvD=} z%ZMj*9bXYg%+*`w+3fL;w9sd3`3d`~BH4ERPj~Q-iWT#kCnaypEx}hCE!H7G=Zf+! z9HHu>j(F>WXNJVTc0CAu)z)@}4^8o*Yw-_UL$6}48*9k~@q8=Bi3q@#SbKp!1aiS+ zFs$S2EHqV@s~FbC(c&Lvdc=6H%g?aQlh+k?Q}*A;eV#e-X7jfj++I-bvv{uEjN_W0-iqXWMo`U&e8C_wlfp&wBW@Cx&U z+VvzdeHUJhiGRy_5X{$L><#{*J<(4|+sp?4{5$+UT?NsHNIxvvo@HLl$JwoIha6#i zfPNf4dfAsFZCm{N3%j(dJhk?}D!0HM+vM8$4EO}#cY8Aac@B~L>&`+)_^&U@5z%1- z!Il!TDDXd7_zunHJl)hhI?!*dVHJpYue%;3xbI7y}~M;~#clr|Wf@FNhDq z9I;sc2RY)KD|#ZC+tJT&%ylS`fB?%#5F)LApZLbJ2{Wn(F=PS zbN2AXGvDF6lIwZ3_{VSXpA|%(66)=;JyTA{I zz6|!F)YlkG!u|lR5Nz!$@Tp@*1P@G*8|GvFN;21DXskwIndR$?D**`@!_A{USi5aPpEgnmzk z>Wqh$xo)ct0Omw~Y#ZPm@xG<+z?fCr-)iwM+g9c$^5k~LSW@~Z?C}r$iTQFnn?qga z&SgOvv^Hyw-TGh28<{WGxMQ3xu&l*D_#sH)g{T|q#bty&js{9Zj37rW2FV^-$Mj%(}JBkjKyef^iuE~HN#(pXJ8{Ppg zphFE941s^>ZOLav)*yFG*uTM7_<3NXN$|l0$9vH2(C6^e zDDf?rhrF{~;~CPu_vsuee{d78s6pT#^Gud<+2n`3vFCvM6czvMb5rZZ??5i>$CgN3 z`p)6Y2Z{A+mT@ii%EH%Ea z=`ZEIHnxwxvXsGJ%yU#dzON>UI>YbTA%D<*;3t^rR60w&Pc-~M2-u<6imOW&OO65qhy#5!)w)pe9F>VPz{KM?`G zN<0Ih|I+Ta5fLo^VJ#Rig*gBae3{Zug!Lo$*DvlcKa>ISvG$~meVphgF-FJQTbuH2 zS}x{7Fi(KJ(_IDVSl|)sA<%zg+>NmTY!b|aV%@5=`L*dluVLSer~}uxt2Hk0FKWvN zz4N31e+2wzl9%@SN4}3a+9a+o;z8fawcGd|eKYngp^V!6wfFc9vH|-aJU}|y-`m~u zdPvBkHb56*?=;#C>0vE^cAup-u8Z#tfq#sz;b*faIqvR=N0`^Mk8l6Czy$A!?<_zb zAn#ZoggrZu6{OK=V0_uZ+a9bTquz)>r*qy|K1YNAd7Xma(&| zXe&*AoyC*SA&d3_I#T`p32krxt`q+@1pb{^Uo$I{111No95C^3<%M}?a=_a-VB+7~ zTHLI!$pI?|O#EAUVcwY>@HP&Z`1iIJH|uM1z{&v=|5jd@cP0nCjRQTxe`|9sl_&3` zlK5|^}elA+ec-f2cZ3Sbb$Z1P%>YW58BZo z{<|YO%+D?KkgpGeI-p>F#*`xI2;W=iA+AU5=otU4p+|iG=(rt)@Za_n%Fi^EdV5)NDeb5auW8`DU!dc>_fAkBzDnXfAIF(O`e}&ILEv2l>l9us3Xt|Z5S5TIqclj#mJ-%nU zvQk6u^F8PTg<|*`^Pz=!e?p9;k0~ow(kjZ11+BKy)lVo}&>Fr<`jqdPuC3M3I=%;e zW~J+&Q?^Q9@IB~Dh2r=c_mzdd=BuP{DEkIzy@odMJ!m6k-;}h8?a;Fe6PXzS;l*3IFAI*wOE%p=c~YYCpbUPSZ{}O3)U|%-c_6f?-J*Hy~sEh zByohxd5KyC7g)k_Pj48S{en z@l{Zd@n4-4$$dNb>*(KY`gjxnz`lupO^_P)g)V7ni-KkR{aHvb8>`6b!g{IL5^vMm5o?E%;Yu=&~Emn3X`jlD1I zezy0+>}`Iw15QhlHovs@EjB;w{xj0v*VqE#Yzr`Df`lEQu?2+f&o+OAz1?rI`C<2; zWxF3F?0snuSnU4DPHcYI{eq;;ueJNx1~_ja)fSL8zs2qsw!gFkaJAU{;@W{NAnpE3 z(&pFL0#R)5OA_|JMi&U%pKXB4_IAI;=7-%ME$x13^K0z>E7IoYD{KI*-57(N}9v>Ocz0OHS`AGTjZ~hlJA*rO8ScLL0?MC(m}ol z9a5>_{$aAEApYAzN&F2$m5%Veq@#QfI;NrHe9!biQV8Fx)MNZ(k7-fd$CM^~e-`|+ z9>@xx!28>hc%MiqQ%Mg9@%5%M&ZC#q!0$bg$~lhSRKa=ZO_iKS2h_;V970u`jx(s5 z^U;fHIBg9z@x3lo%jxPxb)2@Kp5vc=fE5o9weSB=4&}4t;=OJD)U2<`0e9v=ukc@2 zaGf$RSEsT6abKBw%$<2*mfhrlw{f6X_;0Go752XH0b2ZjX*~B|d}}T9uk-!(-o{+B zz9t6@&VgRxzpc5R&$?Wwi~*$opYfl2X)d30qI`49;Pp0(XL7*g01xGv37{T_l6d}R zHhh2*|9s8jH8zcvU7s)2UIjPP zH925%;Hf##Yy4xbFZ0xJjSk?oKI{W5wtJakL>>~`}?cX zPI?Uk&5D{FaAOWU75qa6N&K@9kk9d`&5Jd3#m&N*956Z1`}Mz)ckuORoEm291LSjn%K7|TGngDOIbdiG zJazxy!gKvOeCCg`7g+iL`|{d=4|yFBe|OdlCI?Io7?lH0jeo55E4aGC&Ii~}oa2Ra zbIf3Jz~q2oIq=l@N6oP2-)0Vg*8yaPjiQFKB*RwRESSjwlLJpb{s#}S=TCggL;HOX zKL5WeBV1n=x3y7qRx0bCoBYk4Th6EZx2P&Dna4RT`qtA8Q*UXgqw#h(dhv{q%(tm;&eJa@JDoc zdVfCmv{2t_x?$?22{-8M=wEP~{MZHU@*Kr37|Zr&ksET(sYP}O{&D6H&i=9b0QuWL zjQ{*A?-0%k(lewU+C(RO{*6xhe%tAE!uMN3w$ay^!IlF#=Z@1!|L-`xO@tiy{Xg!D zcH0)n{Hr4lI0XMV^RM{UI_~rN`+s5#fbW0xr>uxq31pqPbav|PxyVu z`Q-mB*E3kpD!OIbsX<#P)aTpIZx8MV&W`yXs?L1mmV9$^p&f*O5@-L4?}DlLAHaJ7 zhf-ZZoUT=>%6LQ(qh6pBzOKQ4=!@T?`sG zZO$&U!|-2UbVuOb+W#vyKt;-7UF+0TT|rSZ2V44@XIb8yOQA1*o31UH$mgCm>RL~C zOZ?uK%PjbJetRUWU+He_a#eX9hX3Z;Lh=1C#RdTO*$2pb0g7&X!o!1B-RjrY+Df-R zn#-~VTl3$YPN6>Ep$ijz`OE{;2dFkO&&%9i{g!Lm#P?aAi`t_W#k}c~0}jJ~TT3Gq z#4NR}15ogfa{+QM&gSnWAshX|!@XZq2;(r6@$YnEc{@Gir#$y~N52ZXS<09Xz45_p z3VHGWIlVnj_WdERM?3Fk9CKN59fp6@pfq8dE&f9XP$tg{Ht;ut^$dBJ&(MiJKVVt= zu4~Bud+Y-|s%I75D($xBX1cy&ng#z(Y!lyqXI=bXy!Yg;Tk*?@+YtFz9t?Irtu zzT%$06IPvdPS+|{rY6yu;lH3zU&z|`oK7JxeuwUFS*>dw-6`=_75}iwoY)?W|A;Za zp_**-8x-A?7l-1%vHTIQ`yFGQ0~ETTAKL|6yIGvxMQf}mrHii*;4uK~e<#YDC9jYd zpJgB56w?Q2X+8!2)4BhD)~W4*_CNP3`(las$4f& z^RR>+ihua|;qS-VU-&uHXh`oEU2D z|Hb*>1H}5kZkDN6(aMwWQP|L5P>3)3{~tP?j=%UlO5C!hS7rCKm#`)y?t?ci_;+4g zpbcUczQ*&-`i(U`tgRg0|2Nm^y~)_9>VC<37_+shiDH*dW10It9rt_A>2%EJ2NXH6ANv5!J*Q~8+iRB!{5!KP z(Dql}o@C;```B?f{*iU*-HnP5P~xAjnP{)Z3X#1L^#P>)L2!^jzDjxz3l?0oD? zedGUQ%8UrWcCy;+4L1%;!gR%d%Ep6ePHaP0@V~U(Sg!h}5I;t9; zo{nz#Z>rAb^{_89=CKa|Hozbr5A>(vJ74L_)1o`qDRjUKPR6}ezK_26V@llixvq6| zhr~-0@6d@szocWnu*0nU(ewVO&rjI@)1TLx>$}&)9r)kPGU|r^*5+Eui(a7d0doJ( zdxG*VFW_}R`uS&S@^a|RxIT1*HDFaCsmfOEXw2UwYwLZ?RjA074mPp7p3 z+Whobf2zqfzdi42{3HL8_-`~mK*j#g3V)f$XPMnBUeBY!_Zv0uH3AM@f`6a?ptF;P zdc6;@Hec#t=iwi^=G2JaQ)Rk&_IEdtqg(%v{HxN#EVjR5|6>e*y+O5kvE3|Q@1ouM zY$f9#{r^v$PrlF7iJ`ysdLLjzS&7htM|^;PC$$53KjeR@?1AZj?Iv<`#eaQCBCi7) zsgD1J50J+I}vSa|&-UI)~yTjdJkV(56k|Dhv{ ze^*fOi~mUp+rHGTe(sPk_TXM#FqOayXSTtyK0l}2D;L~>MMfy2uJ{MW3$CrS_y83A z!v~19fi3m=&Cyh6XV9rJ0Tk>D{Qu196#U}z6tnCNul519@);npE9No&pLaU{Gr~iS zfGu|@gRb}o1Im&DZTf%M05~JGp)^T1{s`N1_A3?_y7RFx6ltpfHYQX6D%^>UUy z;Qzl}KtH3c7B zO8ATbJwx)rooxTJ?eG6{*HFMODDzyHo>eq_+IKg7#BK01 z3ibp3f8lfr^7$EESw7FJeSk@Ow^0z=V9w+}_#XGk3SG~d)Jf3y6(;B-R15tDhpBd>$;){uNC(CPdK{;z$!jOUm2 z`{uL&8lagZAnNi`o8k4gQ1u_$=~aKI{X$uUj3B zp76toH|35CF;DBGU5AaOS3hhC$75x9gsk}eT_>Wt&(zO4(Cr3`fe_Kl< z6~wO4tOFP<#sj$*=kQr=D1tsA>rx~I_xZI` z|8u(e#VR-8jv;vTe&ip+*Zw$$Y_b~ona4O%y^Z3nQdLz4gQ)j2)zb5-4uLDBA4<7(uanEOhS7)8; zO}V@EPD6Pag}*+IWf1uPmD36NhEE>LK0y7}I(Lf^E{b%2?+y$8oyU9Vo230aT~uSY z%hqZ5Z>&fa{k?J)kOlwj11wG0rW@1H?_OE)HXZgsALKk4RP*xtB^@8;Po>HFt##Io zc@9YU-u4YH;r-*V;|?gjS0v#y{I@n%@;AWdTGjzs@t+^db2Tmco!5SsW$=*C|IlIo zU%P~Y0)9go=k#0aY-B0D_02k`^FFxW?{_+x(fe?e11uHx9hvs*+>GVmWB6VTwDE~bMf!@Yqqxns4OK}Erxm1 zB?p{`e=bJFqa&6%Knwoa2Y__}bw#(kEacPtSo$E5jt%vv!z>$z{hk4o2ncyRA{D1HTR-_*5W^sES z4YF{0;#fN53;h4D(+T;WnLOU>e1J{WRdjaNB9pd^si|=UT_^sZ%-l7)*=2ZN{&zB#wMDYPy@Q?YxT;3np+E~?#;&<bU5I5>;sJGUST~XR&l~zIx^Uw@&CV0%n9 zk8+hJY_Y{Z>w(Ph3Dj7T)`MdAEb8OaC!8jO9qE{ZKEGif;O3qc+RbC9o(yp+&kwSm zIWcZHRpn&6Ip>^Oe5d2TIx~{(fBr6*?EiTUQ2NPXRFe~>Tg8j+#nX{N0o)g%|NouS z3HZOf0_Pog$2vfqzjSTQM{INb#%a7pUeWI^@#_87y#SEY@!wGPkTOrd#Q5iRfWSV8 z#{gybclVO#s5GmxVn;EfK5Fre0L+aG}P0Dg>TRSpZ}$UjDM$--|y(?u>M}- z1I##omJauQ(J6d}K0oODJE7mJbM-qw zKRjgH|Fa#C!DoWk7vJl74Z8V8O2{!f=;s>jL+2du`5pTJw{)|#?i(#H?m7hz?&~~W z`|&qMhYzCS``&%laxW#uCHSw+zb?-Ivhe}34ya5!*~@A?`L%-EHz{afAMX4A;1UA< zFRy@q%5!~y;@hiB7tsN>$DPGz@b&t-HN1yJzjI`sEM0CJ!zK7{s?HJTe`v>c`2L>4ro6^Jz%pIxXM{u_oe1G^zn=yB`tuiU z88UVxulIjwgzCFv8C-&Yoco=3L#86%bG=ggLVtQGXC%^U}=rHZ~L*MTl8MV)6zt0~ie!G6> zojjQXj*n4aUQW^PuXHM3_xt{VLdU;EB@evwTonhlqRa4)?|){V9;49#jDH>jlt0+# z!0Op1m3#df9qQkg@$c&r0{){`Eb|y2U~O?RMSrl8eJaqW&fzKackoc&H-6!~ZR47M z^@am3!#}?NopWIZV_vQU1pWt6;q{Mr9gsdg^2)4CIyPn$_f^i5S1m5~`@Tr0rcCk( zA7JUjhivn|x(tN$~quv{}16a!6y^e2z3 z%W}^6Gs&1dIMBC01&+1Gg_Il;N;L)geJ?^6p)+oxF2_Ii z{APrWR(*i*|7GwzKt;-7-MALNE0AR>fMuWk*e1CL5B&NFU4ZsFF>wmr+q;jd^YV0) z5id#D<@j%|D;8&eDBlEC{QnueF1YCC8g$mWG(*OPOSHdFe~aAr$Uj^4zsy;y3|KnSMEe$B1<3s7g6bc$PiuSS$xsTkNI?BGS zgdPa!PZ9GLQ110OUN_q6cwzDE6kU#guoq{4D(iqO{r}0KlodXn8p>02;Xd}@pO`$2 z_V{vN;@{sSH|N0~pFVVG=t#P?bq8TTk{P|MT|% z-~%X4)NijW`ieb1=>G?p;i1)Mm2gK<#9wL?F|@cV&8+XFLn7I-+pv>*ceJae!@H#uxn@-0{__4fp37q z2VlkjP|7(sm6~hwyRJbme=WNIfDR2AO?&*%|3eR$U4fcrtm*vvG~46y@~VesEvTfpo7UuXQY4S?}K6V3)KzV$h;1JY++H*WnF#y|T1 z!6tby4+8xL(ZRuEC^bT#`6N3`xznG9!9VPNBF9`pj9^v7sGbZjm z824Je9_Txeg2%i{`M2Zs5G8j_(=hl414|RO+2Wt|KvwuG)L8LY7ycI9O`rpV*as*u zFodrrc`^?IeFxFW>2Fe1j=68Z0w_b_zdGx@YWpi=fZ^;D7(rMcq(`VOD5RqkUZ>rD zgDH@)Z<6QpU^nZ4%d6H=bG<(E%6iDDj%gYS|5)#vb!I&40LvNxUIPe!0M-ZV5t{4k zDg3RawA*J8V}GbgUcrMQw72gFdJqz(he)|wnufwZ_WR^sd_&^{RPbML?R|CuHtNFL z*v~i6E))M=rTcdK4xuCCU#HU4G+kuMjS@8!{;}V;=;o*5JV4d{KQWAQ&d;Fcx*}cJ znsn?W+yChMhneI>Jb(_kyy|mmX>8I(uG}P1W8t5fR*@2H+yC<&4*^3o;!3+nVbsKW2$#44~j&tOJUE zhj3oF!vn7gjb8IL?c}~-xBm!3(BA%I>A=vJ-In$b9#4Dvj%J-YjD4#^*|vgRWench zmjh)Cdt-^W+5m>bKk!|8Z!_bc_W}#sU$OtQBPJ5Q7otZK#w zKS-hCwbP>J8e{Z>-C)54nUMLVtv{37!z(UoS~{OgFafp`fu-4WnnZ zI$)R2Fgmwv6`wb$&m6OX+tHBtZzy}f>ww1D>HwYxEPt?H7gm;MWYeM1@BuRZ{71Q# zxD1EKOr?sfTwUtw%0!_DF228x0{e_H2CrpXGtc)AeU%CmyzU!=hQxnMLm96Fn$2SX ztOKgp+e-x1n7 z_+{E@1bu7aIokfl*1bkxku%F+Nc;oiMRBX_e1O9!_rh#ysV{Xne{gQ}rH{U#9X`N6 z`>k9|JAFq|LQtr~>uHn(!S4s5XK3HxS6BzY-ZBQyL#Hp~^Mfmm!X{^w!;tt##mkcq z*!KTy2W0ZzfQHf}UHB5eH<*#EAVMd+c+pM2xd=T+b=j)b5S6xe?pj|E)MbF}-O z0TU=E?v5_! zQa7J-LDW2UhWP@iug>%zcZCR$TiOrbLt(GJ#muGRrNd`3~&ym$FK z!}@%CzLzZ0^EJcbzqPrR@_EgV)d$G^KhFW=$1LTu939_2^%a$LeCk4$!_l0vhZKgRz!6TG${uE!Pa&I>np9AX(nAM}#TX***f zYV~@L_5o_!BsDUccJ&?aa=xSeZ)`iDEt)6a8yf%hC5dA1ueJYIY=DZCAYIxd_1qQO z6)?`_G6?;H{{O_RWz<-$!=49Sxc794)|Qmfu_+jTVeI8P{kxs6h1)6AM}9hXa|?!;OH4tp6U3pj-&bJ zsPtmL+QH*5*k3N>JL{k0(-u*Eg+6CaJBn8hO2^RnZ)ZaVG5_ zIi0pc_qvMzoOj^hNmOtz*|23BG(7%Wo2n`A@&c<5P}l;)sUUU*9~bQSZ#WbrKBPTErtlclweko3v)z9@ z9T+{6%6NaWHw5epI{(2}JpMxe?>hc(_Zv?M!Cvuw0K?;-9sN{%`%AI**V_M&pzL$g zsJS-Z;jpMFDxsiP-(njSd-s z4VfV8`iq}zqOFX7SMnWge`BXUYfKErcelWQeNnt+9gr3O&;=E#`snDGuXnQy0{^eL zoKTJnA8%kEpg!L~H@wiAjagN8{uE##~&o;jabbRItKA+a}*8$?2oTui0#I_gvK3u4Kk^jwI!A22( zJT`uG~=YMCv=bpc$Q7CSKf0VcE{!Ux`vmVIeGr=1x9&4)OnA@tn zB04x>J^_2KltY>K&VG}4KKh1ZDrewS3X@W4&rq!YaV_7`?ukeBIcLUT>~|ad*JNK* zeSgvim~m<})#OAuoTWIU^5p!Fc^u02cfe~dr>%ak(CvMm-ybENJnwQ|qrJB?r9?zK zT&@h5pxfZTu_9Hh`_16>zbgKD3{aL7s0&*!f3}0R_`Ylic|eyy_L_0J;tSTN*2Q-_Lv-A0T`H1=l{{bwG~)ZbQP6aN6pRzJH?2X{+C> zbZiFu04qKBJC)_U-{!!$`L5?Vw|U^unN;+^@#9KeI;y8j)NSw&KY!7!bz=Xowf`T< z`-5jub6wHXaiN2^v*Yg3&H=Bx9Q#85uuSe9HJ3^sXLV3g&v+DnD2)9r2GF(4>w^;( zP*uL;Wy*jp88!-e$e6Lc&5*A?F6bwFdSVkZ zQ#I>=NAY-II2Ff#!$5HC9$9>!;^HS;*&p>9Z4H?0azZ&GKVTnVi{sZjxl)T@&TrpX zjQc0LoX2SAEq)W}&Vf^|WQjBLb}Rh1wbWC7>~hT*0N)20L3z=OsI|%Q>*MYm3={HT z2>i2L9-00wpG)OAKET@23Ocp$Q^W8W?Yv{aG|IYu*O?e}WnONDe=w>vVJl;Pm}Ly0 z*#B8)CsSipro;J|5p$ci_n&Hrerc=!WZEvA0Wn87eC$3{l4pME2-bN_)nsP zFTcaSzhZ~Wl0g%6JN#E?M%s=4SqFrTrJ6j)pV?lTo^q4f-u;Hp{&oB}rcL}ih64>{4|yHX%bNaQ83U9j z?{`cFnp7aCM`ygx_9^?{T#S9>k8<1$4AGR({cg)L@&v|=z)$-!K+oSh65)PK?R|;e zuK&lmzqyy*u=oIB|10<}j9bkU?>~jdS8q~rN`||!_G!xNcKFA+|3x>}SbTsI|GW<9;#_K} zFMS$bbolPgz);$35P3y8f~URbHhY0v8k?vhyMXSWyhJD7;&YYzO=s+53}rCvb9pxV zPNuNMpA*i1@uJ{%_($86KRj%Uf3^WK&%8qQrAZENy3|WIXlq}V+kmOAC;!)J=g_%y zV|NH8hF+w^kPC*SyFn2YyXi26E%}o6jeCc-^qI;!l*cbd;9(bKNDTFgGe5N$aZCKy z=3f`z0Ti}B?0?<|i2JGx$M4xJew0bON5BX8x?$vZGnZwvpOI+_ZSqwJWi>kP!OH`$ zETPKWLM=AD;odFr-&CC~z5}f615ogv&g%e5?rnB>o7I(9QPAt}3A`Ccj;ArEj7|oy zm%8ZdwQULfUs-?9(M!l2E{i%O=$80zZLX#KD@!y!K<@ufj->pUWjuG|*mc{j%`FtZ zVmxdd@4-xiv51}D#R`Ek4lu>-6YojDq{e}{>R$jrfSE3 zFZ0^AqwF6t@!wNyxkG%k>%~uZx#gUSJ(zp(x7*^sD&vf14FK>zTAUGFTM*X^Hgx?; zO5|1cVa?$FAAUfSJcS3F{io6Hk#A8!Vrtg{;JLrLE&l6E@3YSz>wu*H4?18ZRirrn z+~&fhH1@l{NsyP#jDM3ng9n@Zrc(6R2YB6xXYHx#fx2>A{I}GX@SOh~iw{8Q|8X{0 z(ap8|$sotBkEtmsr-KvUrA>ao{|u8nfCp&H{jV(JZ>xF5cfLDQ32ux3w$>&pj9X<{ z2O#l3l5(Tw6V?biGGHCcsl{K@Mqd;Eo}&M@^qoofj-GR*n7CtFZi{~~yZpg^yZ(Q) z_*QU3`9p`dMa;$^6aOC2??Zo~??1h4y|wKFt?`dDfAXT=5o>?M8eoC{QQ})+ zI4j7Jp{k&i{YA^!c7*?LmPu~M1J>KDyRWQ2Of5~0-6!fuIdgJaZjFB|*et&DHLnB0 zH$he<^TbAbNrJy z`#0mHWetF3{Esui>k4l>fIYTJM16UHHkkN#JKYEDAA4ghRpffbnf|sy+x1^=kN^7O zySxr)g0=s*_ya3ak9S@3Ui_L6a)~ziqu-ltlAG{gqu)#lp0S#WQ$7Ft$-NM1x5q!e z{ey1+2^+xD|EIGLu=w^DJUnphx~}Y74`}Pac_#MV2>U^d{h}1l{cch(Ft2^Z?eULq z|KS^eI0sye|1JH0-sOdaZw5Ltz<%5_ZW(PfZ2-gTK=k`Z=dPvVM`pj@1qR$6|EOEp z{Xkp%vmGGL1g~)Hp3bJ)dOG&zXSC4|V}Lm(8IcG6JT_wrTegWRa=L$>$L-st5>fU zbolULYPOs^eE$4-@$8dNKB1jEcT!zl$9vja_#DXK;9xawU|=9MG&IQMyL0sj{MY0} z@%W$D0a@&Su_h?&CBk!u2L2|{g|&Ox2WaBoi284%-)!1CU>;rm`Y_d%_v||<8#Zj9 zXP$YcQ~IMn`Xl;>fA|NQHf5LxZ++`qoxY<@@9nqWZYPQ}X3S9E_4D(i z`g&!~Iw&ZJ{`61(RQ&e0fBUx-9Ua~7T|7ujOQUC>eO66l)~s2)_oYYQ!SV?FH*!Cp z6)}l*0M7%!{6BsGj2JbH5%Qk`TdO>H>x=L z<3Ijm0`C6q@BU8Epa1!vtG@x$Q>RWP^t~Aw8T6g+d`JE5FaF{$=x_e!Z|JZ8`mb&1 zFaPo{2{wRzm^pK%`ffl#fQtXJvNHP3Z+@fx20iik@nd@Q=#fg$J+N0$R+I^4?MzSD z=d1bKBkMOazfHhjR#uh`{ueD;L@_Zj z6crU^LzgaH5`DD>cjC4){Nu+jzx-0o5BdXT(h}?r=qPDp3>q{@)gSixbn&-G;2(Z| z`1{j&|F5oX-R=z9;MIkZVJA9R<%`%d$7y#J>yh56px2fiEqm*e%D| z$LARRuEjrW`dhbdQGR~D80*6hfPF5p_p4w1iYh89C^IwDrvJwnAASFY3l~&^FE1&n z<8$e|!as!dt+(D%^F-gjWXTd*xpJlOGd}8oxEyT{-k>;16qPwoHt zn}F~MR;D@jTh48*ZFF|kZd&ha;@@dwApg1Sb5;1g@0FDl!?-Vb%;!Az$ko*~BDDDb zvp@SY`tEnXOF#VK4@DpR_kaKQD%OC*BS(&i#O%la@Du#iU;UL0Auad}I|@(2|MlzF zN#^~J|M-t${sHL}78a5g{~vz%VMob#=b6XgAM5?G?qAsl03YC39uJJ7;`sF}Ck_-8 z=IYIT;llRZ81M$$04C|dgAI)N^}guq=hF6Ji+JzFH+1*dC0@g;&$oHC_?P{>{qXm6f`|Hj5f8a;Zns(Y3%UoKuhaNvMCKk%>r`mdCj zn5a#*C-)wM|F)I}vG31120;Hmn)0qJrq-sO%>_M)fsdo(X;WYJ0kVvm)O#KPHbeo_;*`(2BF^tqVR$K3v>pMFXsMvNfz zreS7?m9XnR|?LPbLGm-?qL3nt0yZrFrN%%*cRy_-gE2qSG6rD$vkxFE@^!*F0geo%X}PqQ?a*yQ z-{HQ=q#pA?@NE(89Jz${jDMF7zP^e=7JN^(8AZ*H)zwQ*$ob(EGC z|M368#+ISDxR`$W)1RuCllHbf{;@`=Cjs;I(njdhr;p9rBAE`>82$FQzZLPI2bL~f zO6c?P9_AS%A|hne-dsHj|2X%@X8aEy!0B;RlNZw)-gNp7_Kz2*=1>98H(}1nq+amg zVJ4NZ9<9tP=CwLig#Fb$T)Wa~`}F1qE&j2l2LAo3sw&~m!}fe3gK_8?I|rSRp;+9pFd&31T{@y{Qmv>!nVLXKYWieeDlpW!uCM^SYIIL z{C$0WMHywhUR*s2{|)6S;`~2l9U%IDWeiZBe4rPsF<OR&;Lx%J@=gO8^U)e=|?~M5uHALn%1pbM@S3(HwgW=Oc!~=zkuh6 zBi9(oe17nQAE_IKzOtUr3n z;8FO;x&K8s*I9f368~fPI{|MJz87EylLMV`0ORP9BS+GxQKM`K&*0Ys{=&k-sJ6CN zO`@o%NciyJAGCi@{*CwH!^gTn?C}{se0ZmEU-cjk!1sVP{T-1OhxS#*Zymj(eTFzl zzpFg7@p0c#p4zBzqPrJ@?)3VjsbWbaDL2kYHhZT-Fm?* z^A(c=p2h)>!#^4beLdFxiupgx0WxKun@+F|%wTfB;7${|5H;XNJG3>Hl96`kl?0?2T z-xu9l$8#_(YURxvlLKDP0guFgTWd2F-uOi0162C|+{+6HYXr?;a=_$(;DATsA2lma zK4{zjvmKCib`oKafEi2dus@L2rU=3Nzg|5W?`G}{17*du5LlLICP1P448|BaOy z+}}?Tem^n(2mbjyfYSRt+oNsDiOB&E=0FASDKdk}0h0qJ2TTr_956Xxa=_$($pMoC sCI?Iom>e)UU~<6ZfXM-q111Md4wxJ;Ibd?Y { const fixedFilter = { field: 'container', - value: urn, + values: [urn], }; return ( diff --git a/datahub-web-react/src/app/entity/group/GroupAssets.tsx b/datahub-web-react/src/app/entity/group/GroupAssets.tsx index 3dd1e9f97e4412..0417dc1b13b78e 100644 --- a/datahub-web-react/src/app/entity/group/GroupAssets.tsx +++ b/datahub-web-react/src/app/entity/group/GroupAssets.tsx @@ -14,7 +14,7 @@ export const GroupAssets = ({ urn }: Props) => { return ( diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx index f0c6cb5dca6519..634b82105a0baa 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx @@ -2,7 +2,7 @@ import React, { useState, useEffect } from 'react'; import styled from 'styled-components'; import { ApolloError } from '@apollo/client'; import { EntityType, FacetFilterInput } from '../../../../../../types.generated'; -import { ENTITY_FILTER_NAME } from '../../../../../search/utils/constants'; +import { ENTITY_FILTER_NAME, UnionType } from '../../../../../search/utils/constants'; import { SearchCfg } from '../../../../../../conf'; import { EmbeddedListSearchResults } from './EmbeddedListSearchResults'; import EmbeddedListSearchHeader from './EmbeddedListSearchHeader'; @@ -11,6 +11,7 @@ import { GetSearchResultsParams, SearchResultsInterface } from './types'; import { isListSubset } from '../../../utils'; import { EntityAndType } from '../../../types'; import { Message } from '../../../../../shared/Message'; +import { generateOrFilters } from '../../../../../search/utils/generateOrFilters'; const Container = styled.div` display: flex; @@ -48,10 +49,12 @@ export const addFixedQuery = (baseQuery: string, fixedQuery: string, emptyQuery: type Props = { query: string; page: number; + unionType: UnionType; filters: FacetFilterInput[]; onChangeQuery: (query) => void; onChangeFilters: (filters) => void; onChangePage: (page) => void; + onChangeUnionType: (unionType: UnionType) => void; emptySearchQuery?: string | null; fixedFilter?: FacetFilterInput | null; fixedQuery?: string | null; @@ -72,9 +75,11 @@ export const EmbeddedListSearch = ({ query, filters, page, + unionType, onChangeQuery, onChangeFilters, onChangePage, + onChangeUnionType, emptySearchQuery, fixedFilter, fixedQuery, @@ -95,7 +100,7 @@ export const EmbeddedListSearch = ({ const finalFilters = (fixedFilter && [...filtersWithoutEntities, fixedFilter]) || filtersWithoutEntities; const entityFilters: Array = filters .filter((filter) => filter.field === ENTITY_FILTER_NAME) - .map((filter) => filter.value.toUpperCase() as EntityType); + .flatMap((filter) => filter.values.map((value) => value?.toUpperCase() as EntityType)); const [showFilters, setShowFilters] = useState(defaultShowFilters || false); const [isSelectMode, setIsSelectMode] = useState(false); @@ -109,7 +114,8 @@ export const EmbeddedListSearch = ({ query: finalQuery, start: (page - 1) * SearchCfg.RESULTS_PER_PAGE, count: SearchCfg.RESULTS_PER_PAGE, - filters: finalFilters, + filters: [], + orFilters: generateOrFilters(unionType, filtersWithoutEntities), }, }, skip: true, @@ -126,7 +132,8 @@ export const EmbeddedListSearch = ({ query: finalQuery, start: (page - 1) * numResultsPerPage, count: numResultsPerPage, - filters: finalFilters, + filters: [], + orFilters: generateOrFilters(unionType, filtersWithoutEntities), }, }, }); @@ -200,12 +207,14 @@ export const EmbeddedListSearch = ({ searchBarInputStyle={searchBarInputStyle} /> (''); const [page, setPage] = useState(1); + const [unionType, setUnionType] = useState(UnionType.AND); + const [filters, setFilters] = useState>([]); const onChangeQuery = (q: string) => { @@ -70,9 +73,11 @@ export const EmbeddedListSearchModal = ({ query={query} filters={filters} page={page} + unionType={unionType} onChangeQuery={onChangeQuery} onChangeFilters={onChangeFilters} onChangePage={onChangePage} + onChangeUnionType={setUnionType} emptySearchQuery={emptySearchQuery} fixedFilter={fixedFilter} fixedQuery={fixedQuery} diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx index 6fc3bc3bd9e984..d70385bf2b243e 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx @@ -2,11 +2,12 @@ import React from 'react'; import { Pagination, Typography } from 'antd'; import styled from 'styled-components'; import { FacetFilterInput, FacetMetadata, SearchResults as SearchResultType } from '../../../../../../types.generated'; -import { SearchFilters } from '../../../../../search/SearchFilters'; import { SearchCfg } from '../../../../../../conf'; import { EntityNameList } from '../../../../../recommendations/renderer/component/EntityNameList'; import { ReactComponent as LoadingSvg } from '../../../../../../images/datahub-logo-color-loading_pendulum.svg'; import { EntityAndType } from '../../../types'; +import { UnionType } from '../../../../../search/utils/constants'; +import { SearchFiltersSection } from '../../../../../search/SearchFiltersSection'; const SearchBody = styled.div` height: 100%; @@ -44,33 +45,11 @@ const PaginationInfoContainer = styled.span` align-items: center; `; -const FiltersHeader = styled.div` - font-size: 14px; - font-weight: 600; - flex: 0 0 auto; - - padding-left: 20px; - padding-right: 20px; - padding-bottom: 8px; - - width: 100%; - height: 46px; - line-height: 46px; - border-bottom: 1px solid; - border-color: ${(props) => props.theme.styles['border-color-base']}; -`; - const StyledPagination = styled(Pagination)` margin: 0px; padding: 0px; `; -const SearchFilterContainer = styled.div` - padding-top: 10px; - flex: 1 1 auto; - overflow: hidden; -`; - const LoadingContainer = styled.div` padding-top: 40px; padding-bottom: 40px; @@ -86,8 +65,10 @@ interface Props { selectedFilters: Array; loading: boolean; showFilters?: boolean; + unionType: UnionType; onChangeFilters: (filters: Array) => void; onChangePage: (page: number) => void; + onChangeUnionType: (unionType: UnionType) => void; isSelectMode: boolean; selectedEntities: EntityAndType[]; setSelectedEntities: (entities: EntityAndType[]) => any; @@ -102,6 +83,8 @@ export const EmbeddedListSearchResults = ({ selectedFilters, loading, showFilters, + unionType, + onChangeUnionType, onChangeFilters, onChangePage, isSelectMode, @@ -120,15 +103,14 @@ export const EmbeddedListSearchResults = ({ {!!showFilters && ( - Filter - - onChangeFilters(newFilters)} - /> - + )} diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx index c813b97ecfed57..8e8f5b6b16a08f 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx @@ -8,6 +8,7 @@ import { navigateToEntitySearchUrl } from './navigateToEntitySearchUrl'; import { GetSearchResultsParams, SearchResultsInterface } from './types'; import { useEntityQueryParams } from '../../../containers/profile/utils'; import { EmbeddedListSearch } from './EmbeddedListSearch'; +import { UnionType } from '../../../../../search/utils/constants'; type Props = { emptySearchQuery?: string | null; @@ -44,6 +45,8 @@ export const EmbeddedListSearchSection = ({ const params = QueryString.parse(location.search, { arrayFormat: 'comma' }); const query: string = params?.query as string; const page: number = params.page && Number(params.page as string) > 0 ? Number(params.page as string) : 1; + const unionType: UnionType = Number(params.unionType as any as UnionType) || UnionType.AND; + const filters: Array = useFilters(params); const onSearch = (q: string) => { @@ -54,6 +57,7 @@ export const EmbeddedListSearchSection = ({ page: 1, filters, history, + unionType, }); }; @@ -65,6 +69,7 @@ export const EmbeddedListSearchSection = ({ page: 1, filters: newFilters, history, + unionType, }); }; @@ -76,6 +81,19 @@ export const EmbeddedListSearchSection = ({ page: newPage, filters, history, + unionType, + }); + }; + + const onChangeUnionType = (newUnionType: UnionType) => { + navigateToEntitySearchUrl({ + baseUrl: location.pathname, + baseParams, + query, + page, + filters, + history, + unionType: newUnionType, }); }; @@ -83,10 +101,12 @@ export const EmbeddedListSearchSection = ({ (''); const [page, setPage] = useState(1); const [filters, setFilters] = useState>([]); + const [unionType, setUnionType] = useState(UnionType.AND); const [showFilters, setShowFilters] = useState(false); const [numResultsPerPage, setNumResultsPerPage] = useState(SearchCfg.RESULTS_PER_PAGE); @@ -70,7 +71,7 @@ export const SearchSelect = ({ fixedEntityTypes, placeholderText, selectedEntiti ); const entityFilters: Array = filters .filter((filter) => filter.field === ENTITY_FILTER_NAME) - .map((filter) => filter.value.toUpperCase() as EntityType); + .flatMap((filter) => filter.values.map((value) => value.toUpperCase() as EntityType)); const finalEntityTypes = (entityFilters.length > 0 && entityFilters) || fixedEntityTypes || []; // Execute search @@ -166,9 +167,11 @@ export const SearchSelect = ({ fixedEntityTypes, placeholderText, selectedEntiti loading={loading} searchResponse={searchAcrossEntities} filters={facets} + unionType={unionType} selectedFilters={filters} onChangeFilters={onChangeFilters} onChangePage={onChangePage} + onChangeUnionType={setUnionType} page={page} showFilters={showFilters} numResultsPerPage={numResultsPerPage} diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts b/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts index e0f59c4f7fa2ac..20ede4f2ae502e 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts @@ -2,6 +2,7 @@ import { RouteComponentProps } from 'react-router'; import * as QueryString from 'query-string'; import { EntityType, FacetFilterInput } from '../../../../../../types.generated'; import filtersToQueryStringParams from '../../../../../search/utils/filtersToQueryStringParams'; +import { UnionType } from '../../../../../search/utils/constants'; export const navigateToEntitySearchUrl = ({ baseUrl, @@ -11,6 +12,7 @@ export const navigateToEntitySearchUrl = ({ page: newPage = 1, filters: newFilters, history, + unionType, }: { baseUrl: string; baseParams: Record; @@ -19,10 +21,11 @@ export const navigateToEntitySearchUrl = ({ page?: number; filters?: Array; history: RouteComponentProps['history']; + unionType: UnionType; }) => { const constructedFilters = newFilters || []; if (newType) { - constructedFilters.push({ field: 'entity', value: newType }); + constructedFilters.push({ field: 'entity', values: [newType] }); } const search = QueryString.stringify( @@ -30,6 +33,7 @@ export const navigateToEntitySearchUrl = ({ ...filtersToQueryStringParams(constructedFilters), query: newQuery, page: newPage, + unionType, ...baseParams, }, { arrayFormat: 'comma' }, diff --git a/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx b/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx index 5993df25bbef67..e5d65f99336311 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Lineage/ImpactAnalysis.tsx @@ -33,7 +33,7 @@ export const ImpactAnalysis = ({ urn, direction }: Props) => { ); const entityFilters: Array = filters .filter((filter) => filter.field === ENTITY_FILTER_NAME) - .map((filter) => filter.value.toUpperCase() as EntityType); + .flatMap((filter) => filter.values.map((value) => value.toUpperCase() as EntityType)); const { data, loading } = useSearchAcrossLineageQuery({ variables: { @@ -67,7 +67,7 @@ export const ImpactAnalysis = ({ urn, direction }: Props) => { direction, })} defaultShowFilters - defaultFilters={[{ field: 'degree', value: '1' }]} + defaultFilters={[{ field: 'degree', values: ['1'] }]} /> ); diff --git a/datahub-web-react/src/app/entity/user/UserAssets.tsx b/datahub-web-react/src/app/entity/user/UserAssets.tsx index 8a8d4b569871e5..bd1f0b738fcda8 100644 --- a/datahub-web-react/src/app/entity/user/UserAssets.tsx +++ b/datahub-web-react/src/app/entity/user/UserAssets.tsx @@ -15,7 +15,7 @@ export const UserAssets = ({ urn }: Props) => { return ( diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx index daf7c305bb67dd..cce62f9a2a3d63 100644 --- a/datahub-web-react/src/app/home/HomePageHeader.tsx +++ b/datahub-web-react/src/app/home/HomePageHeader.tsx @@ -166,6 +166,7 @@ export const HomePageHeader = () => { start: 0, count: 6, filters: [], + orFilters: [], }, }, }); diff --git a/datahub-web-react/src/app/ingest/source/IngestedAssets.tsx b/datahub-web-react/src/app/ingest/source/IngestedAssets.tsx index ec1bcbed24dfae..d13a0092729e9a 100644 --- a/datahub-web-react/src/app/ingest/source/IngestedAssets.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestedAssets.tsx @@ -71,7 +71,7 @@ export default function IngestedAssets({ id }: Props) { filters: [ { field: 'runId', - value: id, + values: [id], }, ], }, @@ -135,7 +135,7 @@ export default function IngestedAssets({ id }: Props) { {showAssetSearch && ( setShowAssetSearch(false)} /> )} diff --git a/datahub-web-react/src/app/recommendations/renderer/component/GlossaryTermSearchList.tsx b/datahub-web-react/src/app/recommendations/renderer/component/GlossaryTermSearchList.tsx index 2ee11c46d522de..4ccb2fc8eb0234 100644 --- a/datahub-web-react/src/app/recommendations/renderer/component/GlossaryTermSearchList.tsx +++ b/datahub-web-react/src/app/recommendations/renderer/component/GlossaryTermSearchList.tsx @@ -53,7 +53,7 @@ export const GlossaryTermSearchList = ({ content, onClick }: Props) => { filters: [ { field: 'glossaryTerms', - value: term.urn, + values: [term.urn], }, ], history, diff --git a/datahub-web-react/src/app/recommendations/renderer/component/TagSearchList.tsx b/datahub-web-react/src/app/recommendations/renderer/component/TagSearchList.tsx index 91b3ab65121f41..18a82a8595ceda 100644 --- a/datahub-web-react/src/app/recommendations/renderer/component/TagSearchList.tsx +++ b/datahub-web-react/src/app/recommendations/renderer/component/TagSearchList.tsx @@ -44,7 +44,7 @@ export const TagSearchList = ({ content, onClick }: Props) => { filters: [ { field: 'tags', - value: tag.urn, + values: [tag.urn], }, ], history, diff --git a/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx b/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx index 5130ecc3b628b0..3f6e679e62866c 100644 --- a/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx +++ b/datahub-web-react/src/app/search/AdvancedSearchAddFilterSelect.tsx @@ -37,11 +37,13 @@ export const AdvancedSearchAddFilterSelect = ({ selectedFilters, onFilterFieldSe > {Object.keys(FIELD_TO_LABEL) .sort((a, b) => FIELD_TO_LABEL[a].localeCompare(FIELD_TO_LABEL[b])) + .filter((key) => key !== 'degree') .map((key) => ( diff --git a/datahub-web-react/src/app/search/AdvancedSearchFilter.tsx b/datahub-web-react/src/app/search/AdvancedSearchFilter.tsx index a4162defb33fa2..ebf0e585a4fea8 100644 --- a/datahub-web-react/src/app/search/AdvancedSearchFilter.tsx +++ b/datahub-web-react/src/app/search/AdvancedSearchFilter.tsx @@ -14,6 +14,7 @@ type Props = { filter: FacetFilterInput; onClose: () => void; onUpdate: (newValue: FacetFilterInput) => void; + loading: boolean; }; const FilterContainer = styled.div` @@ -46,7 +47,7 @@ const FilterFieldLabel = styled.span` margin-right: 2px; `; -export const AdvancedSearchFilter = ({ facet, filter, onClose, onUpdate }: Props) => { +export const AdvancedSearchFilter = ({ facet, filter, onClose, onUpdate, loading }: Props) => { const [isEditing, setIsEditing] = useState(false); return ( <> @@ -73,7 +74,7 @@ export const AdvancedSearchFilter = ({ facet, filter, onClose, onUpdate }: Props - + {!loading && } {isEditing && ( { const newFilter: FacetFilterInput = { field: filter.field, - value: '', values: values as string[], condition: filter.condition, negated: filter.negated, diff --git a/datahub-web-react/src/app/search/AdvancedSearchFilterValuesSection.tsx b/datahub-web-react/src/app/search/AdvancedSearchFilterValuesSection.tsx index f08eacc5b8b4ed..eaa656338331d4 100644 --- a/datahub-web-react/src/app/search/AdvancedSearchFilterValuesSection.tsx +++ b/datahub-web-react/src/app/search/AdvancedSearchFilterValuesSection.tsx @@ -30,7 +30,7 @@ export const AdvancedSearchFilterValuesSection = ({ facet, filter }: Props) => { return ( - + ); })} diff --git a/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx b/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx index dedbec352ffc0d..f4e70e1b9007d4 100644 --- a/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx +++ b/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx @@ -2,7 +2,7 @@ import * as React from 'react'; import { useState } from 'react'; import styled from 'styled-components'; -import { FacetFilterInput, FacetMetadata, SearchCondition } from '../../types.generated'; +import { FacetFilterInput, FacetMetadata, FilterOperator } from '../../types.generated'; import { ANTD_GRAY } from '../entity/shared/constants'; import { AdvancedSearchFilter } from './AdvancedSearchFilter'; import { AdvancedSearchFilterOverallUnionTypeSelect } from './AdvancedSearchFilterOverallUnionTypeSelect'; @@ -47,6 +47,7 @@ interface Props { onFilterSelect: (newFilters: Array) => void; onChangeUnionType: (unionType: UnionType) => void; unionType?: UnionType; + loading: boolean; } export const AdvancedSearchFilters = ({ @@ -55,6 +56,7 @@ export const AdvancedSearchFilters = ({ selectedFilters, onFilterSelect, onChangeUnionType, + loading, }: Props) => { const [filterField, setFilterField] = useState(null); @@ -68,10 +70,9 @@ export const AdvancedSearchFilters = ({ const newFilter: FacetFilterInput = { field: filterField, values: values as string[], - value: '', // TODO(Gabe): remove once we refactor the model condition: FIELDS_THAT_USE_CONTAINS_OPERATOR.includes(filterField) - ? SearchCondition.Contain - : SearchCondition.Equal, + ? FilterOperator.Contain + : FilterOperator.Equal, }; onFilterSelect([...selectedFilters, newFilter]); }; @@ -94,6 +95,7 @@ export const AdvancedSearchFilters = ({ {selectedFilters.map((filter) => ( facet.field === filter.field) || facets[0]} + loading={loading} filter={filter} onClose={() => { onFilterSelect(selectedFilters.filter((f) => f !== filter)); diff --git a/datahub-web-react/src/app/search/EditTextModal.tsx b/datahub-web-react/src/app/search/EditTextModal.tsx index dd69547604b610..43580d2e2b497e 100644 --- a/datahub-web-react/src/app/search/EditTextModal.tsx +++ b/datahub-web-react/src/app/search/EditTextModal.tsx @@ -21,13 +21,17 @@ export const EditTextModal = ({ defaultValue, onCloseModal, onOk, title }: Props - } > - setStagedValue(e.target.value)} value={stagedValue} /> + setStagedValue(e.target.value)} value={stagedValue} /> ); }; diff --git a/datahub-web-react/src/app/search/SearchFiltersSection.tsx b/datahub-web-react/src/app/search/SearchFiltersSection.tsx new file mode 100644 index 00000000000000..5fddd4d81f75d9 --- /dev/null +++ b/datahub-web-react/src/app/search/SearchFiltersSection.tsx @@ -0,0 +1,99 @@ +import { Button } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components/macro'; +import { FacetFilterInput, FacetMetadata } from '../../types.generated'; +import { UnionType } from './utils/constants'; +import { hasAdvancedFilters } from './utils/hasAdvancedFilters'; +import { AdvancedSearchFilters } from './AdvancedSearchFilters'; +import { SimpleSearchFilters } from './SimpleSearchFilters'; + +type Props = { + filters?: Array | null; + selectedFilters: Array; + unionType: UnionType; + loading: boolean; + onChangeFilters: (filters: Array) => void; + onChangeUnionType: (unionType: UnionType) => void; +}; + +const FiltersContainer = styled.div` + display: block; + max-width: 260px; + min-width: 260px; + overflow-wrap: break-word; + border-right: 1px solid; + border-color: ${(props) => props.theme.styles['border-color-base']}; + max-height: 100%; +`; + +const FiltersHeader = styled.div` + font-size: 14px; + font-weight: 600; + + padding-left: 20px; + padding-right: 20px; + padding-bottom: 8px; + + width: 100%; + height: 47px; + line-height: 47px; + border-bottom: 1px solid; + border-color: ${(props) => props.theme.styles['border-color-base']}; + + justify-content: space-between; + display: flex; +`; + +const SearchFilterContainer = styled.div` + padding-top: 10px; +`; + +// This component renders the entire filters section that allows toggling +// between the simplified search experience and advanced search +export const SearchFiltersSection = ({ + filters, + selectedFilters, + unionType, + loading, + onChangeFilters, + onChangeUnionType, +}: Props) => { + const onlyShowAdvancedFilters = hasAdvancedFilters(selectedFilters, unionType); + + const [seeAdvancedFilters, setSeeAdvancedFilters] = useState(onlyShowAdvancedFilters); + return ( + + + Filter + + + + + {seeAdvancedFilters ? ( + onChangeFilters(newFilters)} + onChangeUnionType={onChangeUnionType} + facets={filters || []} + loading={loading} + /> + ) : ( + + onChangeFilters(newFilters)} + /> + + )} + + ); +}; diff --git a/datahub-web-react/src/app/search/SearchPage.tsx b/datahub-web-react/src/app/search/SearchPage.tsx index 0edefb0847ca6c..fd646e715b3255 100644 --- a/datahub-web-react/src/app/search/SearchPage.tsx +++ b/datahub-web-react/src/app/search/SearchPage.tsx @@ -9,10 +9,11 @@ import { SearchResults } from './SearchResults'; import analytics, { EventType } from '../analytics'; import { useGetSearchResultsForMultipleQuery } from '../../graphql/search.generated'; import { SearchCfg } from '../../conf'; -import { ENTITY_FILTER_NAME } from './utils/constants'; +import { ENTITY_FILTER_NAME, UnionType } from './utils/constants'; import { GetSearchResultsParams } from '../entity/shared/components/styled/search/types'; import { EntityAndType } from '../entity/shared/types'; import { scrollToTop } from '../shared/searchUtils'; +import { generateOrFilters } from './utils/generateOrFilters'; type SearchPageParams = { type?: string; @@ -30,13 +31,15 @@ export const SearchPage = () => { const query: string = decodeURIComponent(params.query ? (params.query as string) : ''); const activeType = entityRegistry.getTypeOrDefaultFromPathName(useParams().type || '', undefined); const page: number = params.page && Number(params.page as string) > 0 ? Number(params.page as string) : 1; + const unionType: UnionType = Number(params.unionType as any as UnionType) || UnionType.AND; + const filters: Array = useFilters(params); const filtersWithoutEntities: Array = filters.filter( (filter) => filter.field !== ENTITY_FILTER_NAME, ); const entityFilters: Array = filters .filter((filter) => filter.field === ENTITY_FILTER_NAME) - .map((filter) => filter.value.toUpperCase() as EntityType); + .flatMap((filter) => filter.values.map((value) => value?.toUpperCase() as EntityType)); const [numResultsPerPage, setNumResultsPerPage] = useState(SearchCfg.RESULTS_PER_PAGE); const [isSelectMode, setIsSelectMode] = useState(false); @@ -54,7 +57,8 @@ export const SearchPage = () => { query, start: (page - 1) * numResultsPerPage, count: numResultsPerPage, - filters: filtersWithoutEntities, + filters: [], + orFilters: generateOrFilters(unionType, filtersWithoutEntities), }, }, }); @@ -75,7 +79,8 @@ export const SearchPage = () => { query, start: (page - 1) * SearchCfg.RESULTS_PER_PAGE, count: SearchCfg.RESULTS_PER_PAGE, - filters: filtersWithoutEntities, + filters: [], + orFilters: generateOrFilters(unionType, filtersWithoutEntities), }, }, }); @@ -85,12 +90,16 @@ export const SearchPage = () => { }; const onChangeFilters = (newFilters: Array) => { - navigateToSearchUrl({ type: activeType, query, page: 1, filters: newFilters, history }); + navigateToSearchUrl({ type: activeType, query, page: 1, filters: newFilters, history, unionType }); + }; + + const onChangeUnionType = (newUnionType: UnionType) => { + navigateToSearchUrl({ type: activeType, query, page: 1, filters, history, unionType: newUnionType }); }; const onChangePage = (newPage: number) => { scrollToTop(); - navigateToSearchUrl({ type: activeType, query, page: newPage, filters, history }); + navigateToSearchUrl({ type: activeType, query, page: newPage, filters, history, unionType }); }; /** @@ -139,6 +148,7 @@ export const SearchPage = () => { return ( <> { selectedFilters={filters} loading={loading} onChangeFilters={onChangeFilters} + onChangeUnionType={onChangeUnionType} onChangePage={onChangePage} numResultsPerPage={numResultsPerPage} setNumResultsPerPage={setNumResultsPerPage} diff --git a/datahub-web-react/src/app/search/SearchResults.tsx b/datahub-web-react/src/app/search/SearchResults.tsx index f034c4fac9472b..3198b7caf054fa 100644 --- a/datahub-web-react/src/app/search/SearchResults.tsx +++ b/datahub-web-react/src/app/search/SearchResults.tsx @@ -10,7 +10,6 @@ import { MatchedField, SearchAcrossEntitiesInput, } from '../../types.generated'; -import { SearchFilters } from './SearchFilters'; import { SearchCfg } from '../../conf'; import { SearchResultsRecommendations } from './SearchResultsRecommendations'; import { useGetAuthenticatedUser } from '../useGetAuthenticatedUser'; @@ -23,6 +22,8 @@ import { isListSubset } from '../entity/shared/utils'; import TabToolbar from '../entity/shared/components/styled/TabToolbar'; import { EntityAndType } from '../entity/shared/types'; import { ErrorSection } from '../shared/error/ErrorSection'; +import { UnionType } from './utils/constants'; +import { SearchFiltersSection } from './SearchFiltersSection'; const SearchBody = styled.div` display: flex; @@ -30,14 +31,6 @@ const SearchBody = styled.div` min-height: calc(100vh - 60px); `; -const FiltersContainer = styled.div` - display: block; - max-width: 260px; - min-width: 260px; - border-right: 1px solid; - border-color: ${(props) => props.theme.styles['border-color-base']}; -`; - const ResultContainer = styled.div` flex: 1; margin-bottom: 20px; @@ -61,25 +54,6 @@ const PaginationInfoContainer = styled.div` align-items: center; `; -const FiltersHeader = styled.div` - font-size: 14px; - font-weight: 600; - - padding-left: 20px; - padding-right: 20px; - padding-bottom: 8px; - - width: 100%; - height: 47px; - line-height: 47px; - border-bottom: 1px solid; - border-color: ${(props) => props.theme.styles['border-color-base']}; -`; - -const SearchFilterContainer = styled.div` - padding-top: 10px; -`; - const SearchResultsRecommendationsContainer = styled.div` margin-top: 40px; `; @@ -92,6 +66,7 @@ const StyledTabToolbar = styled(TabToolbar)` const SearchMenuContainer = styled.div``; interface Props { + unionType?: UnionType; query: string; page: number; searchResponse?: { @@ -108,6 +83,7 @@ interface Props { loading: boolean; error: any; onChangeFilters: (filters: Array) => void; + onChangeUnionType: (unionType: UnionType) => void; onChangePage: (page: number) => void; callSearchOnVariables: (variables: { input: SearchAcrossEntitiesInput; @@ -125,6 +101,7 @@ interface Props { } export const SearchResults = ({ + unionType = UnionType.AND, query, page, searchResponse, @@ -132,6 +109,7 @@ export const SearchResults = ({ selectedFilters, loading, error, + onChangeUnionType, onChangeFilters, onChangePage, callSearchOnVariables, @@ -161,17 +139,14 @@ export const SearchResults = ({ {loading && }
- - Filter - - onChangeFilters(newFilters)} - /> - - + <> diff --git a/datahub-web-react/src/app/search/SearchFilter.tsx b/datahub-web-react/src/app/search/SimpleSearchFilter.tsx similarity index 93% rename from datahub-web-react/src/app/search/SearchFilter.tsx rename to datahub-web-react/src/app/search/SimpleSearchFilter.tsx index 25536ab0252bab..93404cd740a82d 100644 --- a/datahub-web-react/src/app/search/SearchFilter.tsx +++ b/datahub-web-react/src/app/search/SimpleSearchFilter.tsx @@ -5,7 +5,7 @@ import * as React from 'react'; import { useState } from 'react'; import styled from 'styled-components'; -import { FacetMetadata } from '../../types.generated'; +import { FacetFilterInput, FacetMetadata } from '../../types.generated'; import { SearchFilterLabel } from './SearchFilterLabel'; import { TRUNCATED_FILTER_LENGTH } from './utils/constants'; @@ -17,10 +17,7 @@ const isGraphDegreeFilter = (field: string) => { type Props = { facet: FacetMetadata; - selectedFilters: Array<{ - field: string; - value: string; - }>; + selectedFilters: Array; onFilterSelect: (selected: boolean, field: string, value: string) => void; defaultDisplayFilters: boolean; }; @@ -57,12 +54,12 @@ const StyledDownOutlined = styled(DownOutlined)` font-size: 10px; `; -export const SearchFilter = ({ facet, selectedFilters, onFilterSelect, defaultDisplayFilters }: Props) => { +export const SimpleSearchFilter = ({ facet, selectedFilters, onFilterSelect, defaultDisplayFilters }: Props) => { const [areFiltersVisible, setAreFiltersVisible] = useState(defaultDisplayFilters); const [expanded, setExpanded] = useState(false); const isFacetSelected = (field, value) => { - return selectedFilters.find((f) => f.field === field && f.value === value) !== undefined; + return selectedFilters.find((f) => f.field === field && f.values.includes(value)) !== undefined; }; // Aggregations filtered for count > 0 or selected = true diff --git a/datahub-web-react/src/app/search/SearchFilters.tsx b/datahub-web-react/src/app/search/SimpleSearchFilters.tsx similarity index 70% rename from datahub-web-react/src/app/search/SearchFilters.tsx rename to datahub-web-react/src/app/search/SimpleSearchFilters.tsx index 309533dab4c363..b235da383c551c 100644 --- a/datahub-web-react/src/app/search/SearchFilters.tsx +++ b/datahub-web-react/src/app/search/SimpleSearchFilters.tsx @@ -1,8 +1,8 @@ import * as React from 'react'; import styled from 'styled-components'; import { useEffect, useState } from 'react'; -import { FacetMetadata } from '../../types.generated'; -import { SearchFilter } from './SearchFilter'; +import { FacetFilterInput, FacetMetadata } from '../../types.generated'; +import { SimpleSearchFilter } from './SimpleSearchFilter'; const TOP_FILTERS = ['degree', 'entity', 'tags', 'glossaryTerms', 'domains', 'owners']; @@ -24,26 +24,15 @@ export const SearchFilterWrapper = styled.div` interface Props { facets: Array; - selectedFilters: Array<{ - field: string; - value: string; - }>; - onFilterSelect: ( - newFilters: Array<{ - field: string; - value: string; - }>, - ) => void; + selectedFilters: Array; + onFilterSelect: (newFilters: Array) => void; loading: boolean; } -export const SearchFilters = ({ facets, selectedFilters, onFilterSelect, loading }: Props) => { +export const SimpleSearchFilters = ({ facets, selectedFilters, onFilterSelect, loading }: Props) => { const [cachedProps, setCachedProps] = useState<{ facets: Array; - selectedFilters: Array<{ - field: string; - value: string; - }>; + selectedFilters: Array; }>({ facets, selectedFilters, @@ -58,8 +47,14 @@ export const SearchFilters = ({ facets, selectedFilters, onFilterSelect, loading const onFilterSelectAndSetCache = (selected: boolean, field: string, value: string) => { const newFilters = selected - ? [...selectedFilters, { field, value }] - : selectedFilters.filter((filter) => filter.field !== field || filter.value !== value); + ? [...selectedFilters, { field, values: [value] }] + : selectedFilters + .map((filter) => + filter.field === field + ? { ...filter, values: filter.values.filter((val) => val !== value) } + : filter, + ) + .filter((filter) => filter.field !== field || !(filter.values.length === 0)); setCachedProps({ ...cachedProps, selectedFilters: newFilters }); onFilterSelect(newFilters); }; @@ -73,7 +68,7 @@ export const SearchFilters = ({ facets, selectedFilters, onFilterSelect, loading return ( {sortedFacets.map((facet) => ( - ): Array { - return Object.entries( - filters.reduce((acc, filter) => { - acc[filter.field] = [...(acc[filter.field] || []), filter.value]; - return acc; - }, {} as Record), - ).map(([field, values]) => ({ field, value: values.join(',') } as FacetFilterInput)); -} diff --git a/datahub-web-react/src/app/search/utils/filtersToQueryStringParams.ts b/datahub-web-react/src/app/search/utils/filtersToQueryStringParams.ts index 04c80af3b9de3a..6a14a2b664eb9a 100644 --- a/datahub-web-react/src/app/search/utils/filtersToQueryStringParams.ts +++ b/datahub-web-react/src/app/search/utils/filtersToQueryStringParams.ts @@ -1,14 +1,36 @@ -import { FacetFilterInput } from '../../../types.generated'; +import { FacetFilterInput, FilterOperator } from '../../../types.generated'; import { encodeComma } from '../../entity/shared/utils'; -import { FILTER_URL_PREFIX } from './constants'; +import { DEGREE_FILTER, FILTER_URL_PREFIX } from './constants'; + +export const URL_PARAM_SEPARATOR = '___'; + +// In the checkbox-based filter view, usually, selecting two facets ANDs them together. +// E.g., if you select the checkbox for tagA and tagB, that means "has tagA AND tagB" +// we need to special case `degree` filter since it is a OR grouping vs the others which are ANDS by default +function reduceFiltersToCombineDegreeFilters(acc: FacetFilterInput[], filter: FacetFilterInput) { + // if we see a `degree` filter and we already have one, combine it with the other degree filter + if (filter.field === DEGREE_FILTER && acc.filter((f) => f.field === DEGREE_FILTER).length > 0) { + // instead of appending this new degree filter, combine it with the previous one and continue + return acc.map((f) => + f.field === DEGREE_FILTER ? { ...f, values: [...f.values, ...filter.values] } : f, + ) as FacetFilterInput[]; + } + return [...acc, filter] as FacetFilterInput[]; +} + +// we need to reformat our list of filters into a dict +function reduceFiltersIntoQueryStringDict(acc, filter, idx) { + acc[ + `${FILTER_URL_PREFIX}${filter.field}${URL_PARAM_SEPARATOR}${String(!!filter.negated)}${URL_PARAM_SEPARATOR}${ + filter.condition || FilterOperator.Equal + }${URL_PARAM_SEPARATOR}${idx}` + ] = [...filter.values.map((value) => encodeComma(value))]; + return acc; +} // transform filters from [{ filter, value }, { filter, value }] to { filter: [value, value ] } that QueryString can parse export default function filtersToQueryStringParams(filters: Array = []) { - return filters.reduce((acc, filter) => { - acc[`${FILTER_URL_PREFIX}${filter.field}`] = [ - ...(acc[`${FILTER_URL_PREFIX}${filter.field}`] || []), - encodeComma(filter.value), - ]; - return acc; - }, {} as Record); + return filters + .reduce(reduceFiltersToCombineDegreeFilters, []) + .reduce(reduceFiltersIntoQueryStringDict, {} as Record); } diff --git a/datahub-web-react/src/app/search/utils/generateOrFilters.ts b/datahub-web-react/src/app/search/utils/generateOrFilters.ts new file mode 100644 index 00000000000000..a798a6ada4b2a8 --- /dev/null +++ b/datahub-web-react/src/app/search/utils/generateOrFilters.ts @@ -0,0 +1,20 @@ +import { FacetFilterInput, OrFilter } from '../../../types.generated'; +import { UnionType } from './constants'; + +export function generateOrFilters(unionType: UnionType, filters: FacetFilterInput[]): OrFilter[] { + if ((filters?.length || 0) === 0) { + return []; + } + + if (unionType === UnionType.OR) { + return filters.map((filter) => ({ + and: [filter], + })); + } + + return [ + { + and: filters, + }, + ]; +} diff --git a/datahub-web-react/src/app/search/utils/hasAdvancedFilters.ts b/datahub-web-react/src/app/search/utils/hasAdvancedFilters.ts new file mode 100644 index 00000000000000..e1b7c104b974b4 --- /dev/null +++ b/datahub-web-react/src/app/search/utils/hasAdvancedFilters.ts @@ -0,0 +1,12 @@ +import { FacetFilterInput } from '../../../types.generated'; +import { ADVANCED_SEARCH_ONLY_FILTERS, UnionType } from './constants'; + +// utility method that looks at the set of filters and determines if the filters can be represented by simple search +export const hasAdvancedFilters = (filters: FacetFilterInput[], unionType: UnionType) => { + return ( + filters.filter( + (filter) => + ADVANCED_SEARCH_ONLY_FILTERS.indexOf(filter.field) >= 0 || filter.negated || unionType === UnionType.OR, + ).length > 0 + ); +}; diff --git a/datahub-web-react/src/app/search/utils/navigateToSearchUrl.ts b/datahub-web-react/src/app/search/utils/navigateToSearchUrl.ts index 3827283bb353c2..73f797900419cf 100644 --- a/datahub-web-react/src/app/search/utils/navigateToSearchUrl.ts +++ b/datahub-web-react/src/app/search/utils/navigateToSearchUrl.ts @@ -4,12 +4,14 @@ import { RouteComponentProps } from 'react-router-dom'; import filtersToQueryStringParams from './filtersToQueryStringParams'; import { EntityType, FacetFilterInput } from '../../../types.generated'; import { PageRoutes } from '../../../conf/Global'; +import { UnionType } from './constants'; export const navigateToSearchUrl = ({ type: newType, query: newQuery, page: newPage = 1, filters: newFilters, + unionType = UnionType.AND, history, }: { type?: EntityType; @@ -17,10 +19,11 @@ export const navigateToSearchUrl = ({ page?: number; filters?: Array; history: RouteComponentProps['history']; + unionType?: UnionType; }) => { const constructedFilters = newFilters || []; if (newType) { - constructedFilters.push({ field: 'entity', value: newType }); + constructedFilters.push({ field: 'entity', values: [newType] }); } const search = QueryString.stringify( @@ -28,6 +31,7 @@ export const navigateToSearchUrl = ({ ...filtersToQueryStringParams(constructedFilters), query: encodeURIComponent(newQuery || ''), page: newPage, + unionType, }, { arrayFormat: 'comma' }, ); @@ -37,33 +41,3 @@ export const navigateToSearchUrl = ({ search, }); }; - -export const navigateToSearchLineageUrl = ({ - entityUrl, - query: newQuery, - page: newPage = 1, - filters: newFilters, - history, -}: { - entityUrl: string; - query?: string; - page?: number; - filters?: Array; - history: RouteComponentProps['history']; -}) => { - const constructedFilters = newFilters || []; - - const search = QueryString.stringify( - { - ...filtersToQueryStringParams(constructedFilters), - query: encodeURIComponent(newQuery || ''), - page: newPage, - }, - { arrayFormat: 'comma' }, - ); - - history.push({ - pathname: entityUrl, - search, - }); -}; diff --git a/datahub-web-react/src/app/search/utils/useFilters.ts b/datahub-web-react/src/app/search/utils/useFilters.ts index cab00fa8258c35..fd3f9e48b0ac48 100644 --- a/datahub-web-react/src/app/search/utils/useFilters.ts +++ b/datahub-web-react/src/app/search/utils/useFilters.ts @@ -2,27 +2,37 @@ import { useMemo } from 'react'; import * as QueryString from 'query-string'; import { FILTER_URL_PREFIX } from './constants'; -import { FacetFilterInput } from '../../../types.generated'; +import { FacetFilterInput, FilterOperator } from '../../../types.generated'; import { decodeComma } from '../../entity/shared/utils'; +import { URL_PARAM_SEPARATOR } from './filtersToQueryStringParams'; export default function useFilters(params: QueryString.ParsedQuery): Array { - return useMemo( - () => - // get all query params + return useMemo(() => { + return ( Object.entries(params) // select only the ones with the `filter_` prefix .filter(([key, _]) => key.indexOf(FILTER_URL_PREFIX) >= 0) // transform the filters currently in format [key, [value1, value2]] to [{key: key, value: value1}, { key: key, value: value2}] format that graphql expects - .flatMap(([key, value]) => { + .map(([key, value]) => { // remove the `filter_` prefix - const field = key.replace(FILTER_URL_PREFIX, ''); - if (!value) return []; + const fieldIndex = key.replace(FILTER_URL_PREFIX, ''); + const fieldParts = fieldIndex.split(URL_PARAM_SEPARATOR); + const field = fieldParts[0]; + const negated = fieldParts[1] === 'true'; + const condition = fieldParts[2] || FilterOperator.Equal; + if (!value) return null; if (Array.isArray(value)) { - return value.map((distinctValue) => ({ field, value: decodeComma(distinctValue) })); + return { + field, + condition, + negated, + values: value.map((distinctValue) => decodeComma(distinctValue)), + }; } - return [{ field, value: decodeComma(value) }]; - }), - [params], - ); + return { field, condition, values: [decodeComma(value)], negated }; + }) + .filter((val) => !!val) as Array + ); + }, [params]); } diff --git a/datahub-web-react/src/app/settings/AccessTokens.tsx b/datahub-web-react/src/app/settings/AccessTokens.tsx index f058355c533ea1..273d3d2fb5ffa6 100644 --- a/datahub-web-react/src/app/settings/AccessTokens.tsx +++ b/datahub-web-react/src/app/settings/AccessTokens.tsx @@ -93,7 +93,7 @@ export const AccessTokens = () => { const filters: Array = [ { field: 'ownerUrn', - value: currentUserUrn, + values: [currentUserUrn], }, ]; diff --git a/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx b/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx index 5ac05c1da75690..f88a27fd0c0799 100644 --- a/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx +++ b/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx @@ -136,7 +136,7 @@ export default function EditTagTermsModal({ entity.type === EntityType.Tag ? (entity as Tag).name : entityRegistry.getDisplayName(entity.type, entity); const tagOrTermComponent = ; return ( - + {tagOrTermComponent} ); @@ -431,6 +431,7 @@ export default function EditTagTermsModal({ > setIsFocusedOnInput(false)}>

+### Advanced Filters + +Using the Advanced Filter view, you can apply more complex filters. To get there, click 'Advanced' in the top right of the filter panel: + +

+ +

+ +#### Adding an Advanced Filter + +Currently, Advanced Filters support filtering by Column Name, Container, Domain, Description (entity or column level), Tag (entity or column level), Glossary Term (entity or column level), Owner, Entity Type, Subtype, Environment and soft-deleted status. + +To add a new filter, click the add filter menu, choose a filter type, and then fill in the values you want to filter by. + +

+ +

+ +#### Matching Any Advanced Filter + +By default, all filters must be matched in order for a result to appear. For example, if you add a tag filter and a platform filter, all results will have the tag and the platform. You can set the results to match any filter instead. Click on `all filters` and select `any filter` from the drop-down menu. + +

+ +

+ +#### Negating An Advanced Filter + +After creating a filter, you can choose whether results should or should not match it. Change this by clicking the operation in the top right of the filter and selecting the negated operation. + +

+ +

+ + ### Results Search results appear ranked by their relevance. In self-hosted DataHub ranking is based on how closely the query matched textual fields of an asset and its metadata. In Managed DataHub, ranking is based on a combination of textual relevance, usage (queries / views), and change frequency. @@ -142,7 +177,8 @@ The order of the search results is based on the weight what Datahub gives them b The sample queries here are non exhaustive. [The link here](https://demo.datahubproject.io/tag/urn:li:tag:Searchable) shows the current list of indexed fields for each entity inside Datahub. Click on the fields inside each entity and see which field has the tag ```Searchable```. However, it does not tell you the specific attribute name to use for specialized searches. One way to do so is to inspect the ElasticSearch indices, for example: -```curl http://localhost:9200/_cat/indices``` returns all the ES indices in the ElasticSearch container. +`curl http://localhost:9200/_cat/indices` returns all the ES indices in the ElasticSearch container. + ``` yellow open chartindex_v2_1643510690325 bQO_RSiCSUiKJYsmJClsew 1 1 2 0 8.5kb 8.5kb yellow open mlmodelgroupindex_v2_1643510678529 OjIy0wb7RyKqLz3uTENRHQ 1 1 0 0 208b 208b @@ -176,11 +212,13 @@ yellow open system_metadata_service_v1 36spEDbDTdKgVl yellow open schemafieldindex_v2_1643510684410 tZ1gC3haTReRLmpCxirVxQ 1 1 0 0 208b 208b yellow open mlfeatureindex_v2_1643510680246 aQO5HF0mT62Znn-oIWBC8A 1 1 20 0 17.4kb 17.4kb yellow open tagindex_v2_1643510684785 PfnUdCUORY2fnF3I3W7HwA 1 1 3 1 18.6kb 18.6kb -``` +``` + The index name will vary from instance to instance. Indexed information about Datasets can be found in: -```curl http://localhost:9200/datasetindex_v2_1643510688970/_search?=pretty``` +`curl http://localhost:9200/datasetindex_v2_1643510688970/_search?=pretty` + +example information of a dataset: -example information of a dataset: ``` { "_index" : "datasetindex_v2_1643510688970", diff --git a/metadata-ingestion/ingest_schema.py b/metadata-ingestion/ingest_schema.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 21394736c03ed8..07388822ef1116 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -182,7 +182,7 @@ private List filterRelationships(@Nonnull EntityLineageResu List degreeFilter = conjunctiveCriterion.getAnd() .stream() .filter(criterion -> criterion.getField().equals(DEGREE_FILTER_INPUT)) - .map(Criterion::getValue) + .flatMap(c -> c.getValues().stream()) .collect(Collectors.toList()); if (!degreeFilter.isEmpty()) { Predicate degreePredicate = convertFilterToPredicate(degreeFilter); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java b/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java index 947af9f7217bce..fb0ce06f1868c5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java @@ -171,7 +171,8 @@ private Map trimMergedAggregations(Map Pair.of(entry.getKey(), new AggregationMetadata() .setName(entry.getValue().getName()) .setDisplayName(entry.getValue().getDisplayName(GetMode.NULL)) - .setAggregations(entry.getValue().getAggregations()) + .setAggregations( + entry.getValue().getAggregations()) .setFilterValues( trimFilterValues(entry.getValue().getFilterValues())) ) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 58b62ae9418963..cefc008c8a01af 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; @@ -61,6 +60,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import static com.linkedin.metadata.search.utils.ESUtils.*; import static com.linkedin.metadata.utils.SearchUtil.*; @@ -69,6 +69,8 @@ public class SearchRequestHandler { private static final Map REQUEST_HANDLER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); private static final String REMOVED = "removed"; + + private static final String URN_FILTER = "urn"; private static final int DEFAULT_MAX_TERM_BUCKET_SIZE = 20; private final EntitySpec _entitySpec; @@ -133,7 +135,7 @@ public static BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { boolean removedInOrFilter = false; if (filter != null) { removedInOrFilter = filter.getOr().stream().anyMatch( - or -> or.getAnd().stream().anyMatch(criterion -> criterion.getField().equals(REMOVED)) + or -> or.getAnd().stream().anyMatch(criterion -> criterion.getField().equals(REMOVED) || criterion.getField().equals(REMOVED + KEYWORD_SUFFIX)) ); } // Filter out entities that are marked "removed" if and only if filter does not contain a criterion referencing it. @@ -404,8 +406,8 @@ private static Map extractTermAggregations(@Nonnull ParsedTerms te /** * Injects the missing conjunctive filters into the aggregations list. */ - private List addFiltersToAggregationMetadata(@Nonnull final List originalMetadata, @Nullable final Filter filter) { - if (filter == null) { + public List addFiltersToAggregationMetadata(@Nonnull final List originalMetadata, @Nullable final Filter filter) { + if (filter == null) { return originalMetadata; } if (filter.hasOr()) { @@ -416,7 +418,7 @@ private List addFiltersToAggregationMetadata(@Nonnull final return originalMetadata; } - private void addOrFiltersToAggregationMetadata(@Nonnull final ConjunctiveCriterionArray or, @Nonnull final List originalMetadata) { + void addOrFiltersToAggregationMetadata(@Nonnull final ConjunctiveCriterionArray or, @Nonnull final List originalMetadata) { for (ConjunctiveCriterion conjunction : or) { // For each item in the conjunction, inject an empty aggregation if necessary addCriteriaFiltersToAggregationMetadata(conjunction.getAnd(), originalMetadata); @@ -445,6 +447,12 @@ private void addCriterionFiltersToAggregationMetadata( return; } + // We don't want to add urn filters to the aggregations we return as a sidecar to search results. + // They are automatically added by searchAcrossLineage and we dont need them to show up in the filter panel. + if (finalFacetField.equals(URN_FILTER)) { + return; + } + if (aggregationMetadataMap.containsKey(finalFacetField)) { /* * If we already have aggregations for the facet field, simply inject any missing values counts into the set. @@ -452,7 +460,11 @@ private void addCriterionFiltersToAggregationMetadata( * Elasticsearch. */ AggregationMetadata originalAggMetadata = aggregationMetadataMap.get(finalFacetField); - addMissingAggregationValueToAggregationMetadata(criterion.getValue(), originalAggMetadata); + if (criterion.hasValues()) { + criterion.getValues().stream().forEach(value -> addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); + } else { + addMissingAggregationValueToAggregationMetadata(criterion.getValue(), originalAggMetadata); + } } else { /* * If we do not have ANY aggregation for the facet field, then inject a new aggregation metadata object for the @@ -463,14 +475,18 @@ private void addCriterionFiltersToAggregationMetadata( originalMetadata.add(buildAggregationMetadata( finalFacetField, _filtersToDisplayName.getOrDefault(finalFacetField, finalFacetField), - new LongMap(ImmutableMap.of(criterion.getValue(), 0L)), - new FilterValueArray(ImmutableList.of(createFilterValue(criterion.getValue(), 0L)))) + new LongMap(criterion.getValues().stream().collect(Collectors.toMap(i -> i, i -> 0L))), + new FilterValueArray(criterion.getValues().stream().map(value -> createFilterValue(value, 0L)).collect( + Collectors.toList()))) ); } } private void addMissingAggregationValueToAggregationMetadata(@Nonnull final String value, @Nonnull final AggregationMetadata originalMetadata) { - if (originalMetadata.getAggregations().entrySet().stream().noneMatch(entry -> value.equals(entry.getKey()))) { + if ( + originalMetadata.getAggregations().entrySet().stream().noneMatch(entry -> value.equals(entry.getKey())) + || originalMetadata.getFilterValues().stream().noneMatch(entry -> entry.getValue().equals(value)) + ) { // No aggregation found for filtered value -- inject one! originalMetadata.getAggregations().put(value, 0L); originalMetadata.getFilterValues().add(createFilterValue(value, 0L)); @@ -489,12 +505,4 @@ private AggregationMetadata buildAggregationMetadata( .setFilterValues(filterValues); } - @Nullable - private String toFacetField(@Nonnull final String filterField) { - String trimmedField = filterField.replace(ESUtils.KEYWORD_SUFFIX, ""); - if (_facetFields.contains(trimmedField)) { - return trimmedField; - } - return null; - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index bc3066628abcd7..35914c5cb3f924 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -1,11 +1,14 @@ package com.linkedin.metadata.search.utils; +import com.google.common.collect.ImmutableSet; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import java.util.Arrays; +import java.util.Optional; +import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -28,6 +31,18 @@ public class ESUtils { public static final String KEYWORD_SUFFIX = ".keyword"; public static final int MAX_RESULT_SIZE = 10000; + // we use this to make sure we filter for editable & non-editable fields + public static final String[][] EDITABLE_FIELD_TO_QUERY_PAIRS = { + {"fieldGlossaryTags", "editedFieldGlossaryTags"}, + {"fieldGlossaryTerms", "editedFieldGlossaryTerms"}, + {"fieldDescriptions", "editedFieldDescriptions"}, + {"description", "editedDescription"}, + }; + + public static final Set BOOLEAN_FIELDS = ImmutableSet.of( + "removed" + ); + /* * Refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html for list of reserved * characters in an Elasticsearch regular expression. @@ -76,7 +91,11 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery(@Nonnull ConjunctiveC conjunctiveCriterion.getAnd().forEach(criterion -> { if (!criterion.getValue().trim().isEmpty() || criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { - andQueryBuilder.must(getQueryBuilderFromCriterion(criterion)); + if (!criterion.isNegated()) { + andQueryBuilder.must(getQueryBuilderFromCriterion(criterion)); + } else { + andQueryBuilder.mustNot(getQueryBuilderFromCriterion(criterion)); + } } }); return andQueryBuilder; @@ -107,12 +126,42 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery(@Nonnull ConjunctiveC */ @Nonnull public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull Criterion criterion) { + String fieldName = toFacetField(criterion.getField()); + + Optional pairMatch = Arrays.stream(EDITABLE_FIELD_TO_QUERY_PAIRS) + .filter(pair -> Arrays.stream(pair).anyMatch(pairValue -> pairValue.equals(fieldName))) + .findFirst(); + + if (pairMatch.isPresent()) { + final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); + String[] pairMatchValue = pairMatch.get(); + for (String field: pairMatchValue) { + Criterion criterionToQuery = new Criterion(); + criterionToQuery.setCondition(criterion.getCondition()); + criterionToQuery.setNegated(criterion.isNegated()); + criterionToQuery.setValue(criterion.getValue()); + criterionToQuery.setField(field + KEYWORD_SUFFIX); + orQueryBuilder.should(getQueryBuilderFromCriterionForSingleField(criterionToQuery)); + } + return orQueryBuilder; + } + + return getQueryBuilderFromCriterionForSingleField(criterion); + } + @Nonnull + public static QueryBuilder getQueryBuilderFromCriterionForSingleField(@Nonnull Criterion criterion) { final Condition condition = criterion.getCondition(); + String fieldName = toFacetField(criterion.getField()); + if (condition == Condition.EQUAL) { // If values is set, use terms query to match one of the values if (!criterion.getValues().isEmpty()) { + if (BOOLEAN_FIELDS.contains(fieldName) && criterion.getValues().size() == 1) { + return QueryBuilders.termQuery(fieldName, Boolean.parseBoolean(criterion.getValues().get(0))); + } return QueryBuilders.termsQuery(criterion.getField(), criterion.getValues()); } + // TODO(https://github.com/datahub-project/datahub-gma/issues/51): support multiple values a field can take without using // delimiters like comma. This is a hack to support equals with URN that has a comma in it. if (isUrn(criterion.getValue())) { @@ -185,4 +234,9 @@ public static String escapeReservedCharacters(@Nonnull String input) { } return input; } -} \ No newline at end of file + + @Nullable + public static String toFacetField(@Nonnull final String filterField) { + return filterField.replace(ESUtils.KEYWORD_SUFFIX, ""); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 0f730e21c56423..f48a2832fe28b3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -3,6 +3,7 @@ import com.datahub.util.ModelUtils; import com.google.common.collect.ImmutableList; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.AspectVersion; import com.linkedin.metadata.dao.BaseReadDAO; import com.linkedin.metadata.query.filter.Condition; @@ -39,7 +40,7 @@ public static Criterion newCriterion(@Nonnull String field, @Nonnull String valu // Creates new Criterion with field, value and condition. @Nonnull public static Criterion newCriterion(@Nonnull String field, @Nonnull String value, @Nonnull Condition condition) { - return new Criterion().setField(field).setValue(value).setCondition(condition); + return new Criterion().setField(field).setValue(value).setValues(new StringArray(ImmutableList.of(value))).setCondition(condition); } // Creates new Filter from a map of Criteria by removing null-valued Criteria and using EQUAL condition (default). diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java index b80503740c6388..eb9f386df7024d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java @@ -137,6 +137,7 @@ public static AggregationMetadata merge(AggregationMetadata one, AggregationMeta Stream.concat(one.getAggregations().entrySet().stream(), two.getAggregations().entrySet().stream()) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, Long::sum)); return one.clone() + .setDisplayName(two.getDisplayName() != two.getName() ? two.getDisplayName() : one.getDisplayName()) .setAggregations(new LongMap(mergedMap)) .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(mergedMap))); } @@ -153,4 +154,4 @@ public static ListResult toListResult(final SearchResult searchResult) { new UrnArray(searchResult.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList()))); return listResult; } -} \ No newline at end of file +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java index 7a82864320a6e1..f62c008ddf46d6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java @@ -6,9 +6,16 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; import com.linkedin.metadata.ElasticTestUtils; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.aggregator.AllEntitiesSearchAggregator; import com.linkedin.metadata.search.cache.CachingAllEntitiesSearchAggregator; import com.linkedin.metadata.search.cache.EntityDocCountCache; @@ -160,4 +167,203 @@ public void testSearchService() throws Exception { searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", null, null, 0, 10, null); assertEquals(searchResult.getNumEntities().intValue(), 0); } + + @Test + public void testAdvancedSearchOr() throws Exception { + final Criterion filterCriterion = new Criterion() + .setField("platform") + .setCondition(Condition.EQUAL) + .setValue("hive") + .setValues(new StringArray(ImmutableList.of("hive"))); + + final Criterion subtypeCriterion = new Criterion() + .setField("subtypes") + .setCondition(Condition.EQUAL) + .setValue("") + .setValues(new StringArray(ImmutableList.of("view"))); + + final Filter filterWithCondition = new Filter().setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd( + new CriterionArray(ImmutableList.of(filterCriterion))), + new ConjunctiveCriterion().setAnd( + new CriterionArray(ImmutableList.of(subtypeCriterion))) + )); + + + SearchResult searchResult = + _searchService.searchAcrossEntities(ImmutableList.of(ENTITY_NAME), "test", filterWithCondition, null, 0, 10, null); + + assertEquals(searchResult.getNumEntities().intValue(), 0); + clearCache(); + + Urn urn = new TestEntityUrn("test", "testUrn", "VALUE_1"); + ObjectNode document = JsonNodeFactory.instance.objectNode(); + document.set("urn", JsonNodeFactory.instance.textNode(urn.toString())); + document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document.set("subtypes", JsonNodeFactory.instance.textNode("view")); + document.set("platform", JsonNodeFactory.instance.textNode("snowflake")); + _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + + Urn urn2 = new TestEntityUrn("test", "testUrn", "VALUE_2"); + ObjectNode document2 = JsonNodeFactory.instance.objectNode(); + document2.set("urn", JsonNodeFactory.instance.textNode(urn2.toString())); + document2.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document2.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document2.set("subtypes", JsonNodeFactory.instance.textNode("table")); + document2.set("platform", JsonNodeFactory.instance.textNode("hive")); + _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + + Urn urn3 = new TestEntityUrn("test", "testUrn", "VALUE_3"); + ObjectNode document3 = JsonNodeFactory.instance.objectNode(); + document3.set("urn", JsonNodeFactory.instance.textNode(urn3.toString())); + document3.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document3.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document3.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document3.set("subtypes", JsonNodeFactory.instance.textNode("table")); + document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); + _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); + + syncAfterWrite(_searchClient); + + searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition, null, 0, 10, null); + assertEquals(searchResult.getNumEntities().intValue(), 2); + assertEquals(searchResult.getEntities().get(0).getEntity(), urn); + assertEquals(searchResult.getEntities().get(1).getEntity(), urn2); + clearCache(); + } + + @Test + public void testAdvancedSearchSoftDelete() throws Exception { + final Criterion filterCriterion = new Criterion() + .setField("platform") + .setCondition(Condition.EQUAL) + .setValue("hive") + .setValues(new StringArray(ImmutableList.of("hive"))); + + final Criterion removedCriterion = new Criterion() + .setField("removed") + .setCondition(Condition.EQUAL) + .setValue("") + .setValues(new StringArray(ImmutableList.of("true"))); + + final Filter filterWithCondition = new Filter().setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd( + new CriterionArray(ImmutableList.of(filterCriterion, removedCriterion))) + )); + + + SearchResult searchResult = + _searchService.searchAcrossEntities(ImmutableList.of(ENTITY_NAME), "test", filterWithCondition, null, 0, 10, null); + + assertEquals(searchResult.getNumEntities().intValue(), 0); + clearCache(); + + Urn urn = new TestEntityUrn("test", "testUrn", "VALUE_1"); + ObjectNode document = JsonNodeFactory.instance.objectNode(); + document.set("urn", JsonNodeFactory.instance.textNode(urn.toString())); + document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document.set("subtypes", JsonNodeFactory.instance.textNode("view")); + document.set("platform", JsonNodeFactory.instance.textNode("hive")); + document.set("removed", JsonNodeFactory.instance.booleanNode(true)); + _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + + Urn urn2 = new TestEntityUrn("test", "testUrn", "VALUE_2"); + ObjectNode document2 = JsonNodeFactory.instance.objectNode(); + document2.set("urn", JsonNodeFactory.instance.textNode(urn2.toString())); + document2.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document2.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document2.set("subtypes", JsonNodeFactory.instance.textNode("table")); + document2.set("platform", JsonNodeFactory.instance.textNode("hive")); + document.set("removed", JsonNodeFactory.instance.booleanNode(false)); + _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + + Urn urn3 = new TestEntityUrn("test", "testUrn", "VALUE_3"); + ObjectNode document3 = JsonNodeFactory.instance.objectNode(); + document3.set("urn", JsonNodeFactory.instance.textNode(urn3.toString())); + document3.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document3.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document3.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document3.set("subtypes", JsonNodeFactory.instance.textNode("table")); + document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); + document.set("removed", JsonNodeFactory.instance.booleanNode(false)); + _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); + + syncAfterWrite(_searchClient); + + searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition, null, 0, 10, null); + assertEquals(searchResult.getNumEntities().intValue(), 1); + assertEquals(searchResult.getEntities().get(0).getEntity(), urn); + clearCache(); + } + + @Test + public void testAdvancedSearchNegated() throws Exception { + final Criterion filterCriterion = new Criterion() + .setField("platform") + .setCondition(Condition.EQUAL) + .setValue("hive") + .setNegated(true) + .setValues(new StringArray(ImmutableList.of("hive"))); + + final Filter filterWithCondition = new Filter().setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd( + new CriterionArray(ImmutableList.of(filterCriterion))) + )); + + + SearchResult searchResult = + _searchService.searchAcrossEntities(ImmutableList.of(ENTITY_NAME), "test", filterWithCondition, null, 0, 10, null); + + assertEquals(searchResult.getNumEntities().intValue(), 0); + clearCache(); + + Urn urn = new TestEntityUrn("test", "testUrn", "VALUE_1"); + ObjectNode document = JsonNodeFactory.instance.objectNode(); + document.set("urn", JsonNodeFactory.instance.textNode(urn.toString())); + document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document.set("subtypes", JsonNodeFactory.instance.textNode("view")); + document.set("platform", JsonNodeFactory.instance.textNode("hive")); + document.set("removed", JsonNodeFactory.instance.booleanNode(true)); + _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString()); + + Urn urn2 = new TestEntityUrn("test", "testUrn", "VALUE_2"); + ObjectNode document2 = JsonNodeFactory.instance.objectNode(); + document2.set("urn", JsonNodeFactory.instance.textNode(urn2.toString())); + document2.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document2.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document2.set("subtypes", JsonNodeFactory.instance.textNode("table")); + document2.set("platform", JsonNodeFactory.instance.textNode("hive")); + document.set("removed", JsonNodeFactory.instance.booleanNode(false)); + _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString()); + + Urn urn3 = new TestEntityUrn("test", "testUrn", "VALUE_3"); + ObjectNode document3 = JsonNodeFactory.instance.objectNode(); + document3.set("urn", JsonNodeFactory.instance.textNode(urn3.toString())); + document3.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document3.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); + document3.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document3.set("subtypes", JsonNodeFactory.instance.textNode("table")); + document3.set("platform", JsonNodeFactory.instance.textNode("snowflake")); + document.set("removed", JsonNodeFactory.instance.booleanNode(false)); + _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString()); + + syncAfterWrite(_searchClient); + + searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition, null, 0, 10, null); + assertEquals(searchResult.getNumEntities().intValue(), 1); + assertEquals(searchResult.getEntities().get(0).getEntity(), urn3); + clearCache(); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Criterion.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Criterion.pdl index 0bffd939427234..0a0fd8d67e00f0 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Criterion.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Criterion.pdl @@ -25,4 +25,9 @@ record Criterion { * The condition for the criterion, e.g. EQUAL, START_WITH */ condition: Condition = "EQUAL" -} \ No newline at end of file + + /** + * Whether the condition should be negated + */ + negated: boolean = false +} diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json index 0f54e05ccd5e4b..47ec367f07ad08 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json @@ -72,6 +72,11 @@ }, "doc" : "The condition for the criterion, e.g. EQUAL, START_WITH", "default" : "EQUAL" + }, { + "name" : "negated", + "type" : "boolean", + "doc" : "Whether the condition should be negated", + "default" : false } ] } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 31fc619700a138..685b995a960398 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -162,6 +162,11 @@ }, "doc" : "The condition for the criterion, e.g. EQUAL, START_WITH", "default" : "EQUAL" + }, { + "name" : "negated", + "type" : "boolean", + "doc" : "Whether the condition should be negated", + "default" : false } ] } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index bf1af17fdbec91..fd3f7ba2d90435 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -5248,6 +5248,11 @@ "type" : "Condition", "doc" : "The condition for the criterion, e.g. EQUAL, START_WITH", "default" : "EQUAL" + }, { + "name" : "negated", + "type" : "boolean", + "doc" : "Whether the condition should be negated", + "default" : false } ] } }, diff --git a/smoke-test/tests/cypress/cypress/integration/search/search.js b/smoke-test/tests/cypress/cypress/integration/search/search.js index 96331544d0ac0b..e8d4f907d36345 100644 --- a/smoke-test/tests/cypress/cypress/integration/search/search.js +++ b/smoke-test/tests/cypress/cypress/integration/search/search.js @@ -1,65 +1,192 @@ -describe('search', () => { - it('can hit all entities search, see some results (testing this any more is tricky because it is cached for now)', () => { +describe("search", () => { + it("can hit all entities search, see some results (testing this any more is tricky because it is cached for now)", () => { cy.login(); - cy.visit('/'); - cy.get('input[data-testid=search-input]').type('*{enter}'); + cy.visit("/"); + cy.get("input[data-testid=search-input]").type("*{enter}"); cy.wait(5000); - cy.contains('of 0 results').should('not.exist'); - cy.contains(/of [0-9]+ results/); + cy.contains("of 0 results").should("not.exist"); + cy.contains(/of [0-9]+ results/); }); - it('can hit all entities search with an impossible query and find 0 results', () => { + it("can hit all entities search with an impossible query and find 0 results", () => { cy.login(); - cy.visit('/'); + cy.visit("/"); // random string that is unlikely to accidentally have a match - cy.get('input[data-testid=search-input]').type('zzzzzzzzzzzzzqqqqqqqqqqqqqzzzzzzqzqzqzqzq{enter}'); + cy.get("input[data-testid=search-input]").type( + "zzzzzzzzzzzzzqqqqqqqqqqqqqzzzzzzqzqzqzqzq{enter}" + ); cy.wait(5000); - cy.contains('of 0 results'); + cy.contains("of 0 results"); }); - it('can search, find a result, and visit the dataset page', () => { + it("can search, find a result, and visit the dataset page", () => { cy.login(); - cy.visit('http://localhost:9002/search?filter_entity=DATASET&filter_tags=urn%3Ali%3Atag%3ACypress&page=1&query=users_created') - cy.contains('of 1 result'); + cy.visit( + "/search?filter_entity=DATASET&filter_tags=urn%3Ali%3Atag%3ACypress&page=1&query=users_created" + ); + cy.contains("of 1 result"); - cy.contains('Cypress') + cy.contains("Cypress"); - cy.contains('fct_cypress_users_created').click(); + cy.contains("fct_cypress_users_created").click(); // platform - cy.contains('Hive'); + cy.contains("Hive"); // entity type - cy.contains('Dataset'); + cy.contains("Dataset"); // entity name - cy.contains('fct_cypress_users_created'); + cy.contains("fct_cypress_users_created"); // column name - cy.contains('user_id'); + cy.contains("user_id"); // column description - cy.contains('Id of the user'); + cy.contains("Id of the user"); // table description - cy.contains('table containing all the users created on a single day'); + cy.contains("table containing all the users created on a single day"); }); - it('can search and get glossary term facets with proper labels', () => { + it("can search and get glossary term facets with proper labels", () => { cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)'); - cy.contains('cypress_logging_events'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)" + ); + cy.contains("cypress_logging_events"); - cy.contains('Add Term').click(); + cy.contains("Add Term").click(); - cy.focused().type('CypressTerm'); + cy.focused().type("CypressTerm"); - cy.get('.ant-select-item-option-content').within(() => cy.contains('CypressTerm').click({force: true})); + cy.get(".ant-select-item-option-content").within(() => + cy.contains("CypressTerm").click({ force: true }) + ); - cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({force: true}); - cy.get('[data-testid="add-tag-term-from-modal-btn"]').should('not.exist'); + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({ + force: true, + }); + cy.get('[data-testid="add-tag-term-from-modal-btn"]').should("not.exist"); - cy.contains('CypressTerm'); - cy.visit('http://localhost:9002/search?query=cypress') - cy.contains('CypressTerm') + cy.contains("CypressTerm"); + cy.visit("/search?query=cypress"); + cy.contains("CypressTerm"); }); -}) \ No newline at end of file + + it("can search by a specific term using advanced search", () => { + cy.login(); + + cy.visit("/"); + cy.get("input[data-testid=search-input]").type("*{enter}"); + cy.wait(2000); + + cy.contains("Advanced").click(); + + cy.contains("Add Filter").click(); + + cy.contains("Column Term").click({ force: true }); + + cy.get('[data-testid="tag-term-modal-input"]').type("CypressColumnInfo"); + + cy.wait(2000); + + cy.get('[data-testid="tag-term-option"]').click({ force: true }); + + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({ + force: true, + }); + + cy.wait(2000); + + // has the term in editable metadata + cy.contains("SampleCypressHdfsDataset"); + + // has the term in non-editable metadata + cy.contains("cypress_logging_events"); + + cy.contains("of 2 results"); + }); + + it("can search by AND-ing two concepts using advanced search", () => { + cy.login(); + + cy.visit("/"); + cy.get("input[data-testid=search-input]").type("*{enter}"); + cy.wait(2000); + + cy.contains("Advanced").click(); + + cy.contains("Add Filter").click(); + + cy.contains("Column Term").click({ force: true }); + + cy.get('[data-testid="tag-term-modal-input"]').type("CypressColumnInfo"); + + cy.wait(2000); + + cy.get('[data-testid="tag-term-option"]').click({ force: true }); + + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({ + force: true, + }); + + cy.wait(2000); + + cy.contains("Add Filter").click(); + + cy.get('[data-testid="adv-search-add-filter-description"]').click({ + force: true, + }); + + cy.get('[data-testid="edit-text-input"]').type("log event"); + + cy.get('[data-testid="edit-text-done-btn"]').click({ force: true }); + + // has the term in non-editable metadata + cy.contains("cypress_logging_events"); + }); + + it("can search by OR-ing two concepts using advanced search", () => { + cy.login(); + + cy.visit("/"); + cy.get("input[data-testid=search-input]").type("*{enter}"); + cy.wait(2000); + + cy.contains("Advanced").click(); + + cy.contains("Add Filter").click(); + + cy.contains("Column Term").click({ force: true }); + + cy.get('[data-testid="tag-term-modal-input"]').type("CypressColumnInfo"); + + cy.wait(2000); + + cy.get('[data-testid="tag-term-option"]').click({ force: true }); + + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({ + force: true, + }); + + cy.wait(2000); + + cy.contains("Add Filter").click(); + + cy.get('[data-testid="adv-search-add-filter-description"]').click({ + force: true, + }); + + cy.get('[data-testid="edit-text-input"]').type("log event"); + + cy.get('[data-testid="edit-text-done-btn"]').click({ force: true }); + + // has the term in non-editable metadata + cy.contains("all filters").click(); + cy.contains("any filter").click({ force: true }); + + cy.contains("cypress_logging_events"); + cy.contains("fct_cypress_users_created_no_tag"); + cy.contains("SampleCypressHdfsDataset"); + }); +}); diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json index 296e4c0b22699d..678bab5d269522 100644 --- a/smoke-test/tests/cypress/data.json +++ b/smoke-test/tests/cypress/data.json @@ -223,7 +223,8 @@ "editableSchemaFieldInfo": [ { "fieldPath": "shipment_info", - "globalTags": { "tags": [{ "tag": "urn:li:tag:Legacy" }] } + "globalTags": { "tags": [{ "tag": "urn:li:tag:Legacy" }] }, + "glossaryTerms": { "terms": [{ "urn": "urn:li:glossaryTerm:CypressNode.CypressColumnInfoType" }], "auditStamp": { "time": 0, "actor": "urn:li:corpuser:jdoe", "impersonator": null }} } ] } @@ -621,7 +622,8 @@ } }, "nativeDataType": "boolean", - "recursive": false + "recursive": false, + "glossaryTerms": { "terms": [{ "urn": "urn:li:glossaryTerm:CypressNode.CypressColumnInfoType" }], "auditStamp": { "time": 0, "actor": "urn:li:corpuser:jdoe", "impersonator": null }} }, { "fieldPath": "timestamp", @@ -673,7 +675,7 @@ "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "description": "table containing all the users created on a single day", + "description": "table containing all the users created on a single day. Creted from log events.", "uri": null, "tags": [], "customProperties": { @@ -1820,5 +1822,40 @@ }, "proposedDelta": null, "systemMetadata": null + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:CypressNode.CypressColumnInfoType", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "definition": "a definition", + "parentNode": "urn:li:glossaryNode:CypressNode", + "sourceRef": "FIBO", + "termSource": "EXTERNAL", + "sourceUrl": "https://spec.edmcouncil.org/fibo/ontology/FBC/FunctionalEntities/FinancialServicesEntities/BankingProduct", + "customProperties": { + "FQDN": "SavingAccount" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [{ + "owner": "urn:li:corpuser:jdoe", + "type": "DATAOWNER" + }], + "lastModified": { + "time": 1581407189000, + "actor": "urn:li:corpuser:jdoe" + } + } + } + ] + } + }, + "proposedDelta": null } ] diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 2ce6bb4ad503dc..325c9c4d49ca47 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -262,7 +262,7 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): # User should be able to list his own token res_data = listAccessTokens( - user_session, [{"field": "ownerUrn", "value": "urn:li:corpuser:user"}] + user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) assert res_data assert res_data["data"] @@ -289,7 +289,7 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): # Using a normal account, check that all its tokens where removed. res_data = listAccessTokens( - user_session, [{"field": "ownerUrn", "value": "urn:li:corpuser:user"}] + user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) assert res_data assert res_data["data"] @@ -331,7 +331,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): user_session.cookies.clear() admin_session = loginAs(admin_user, admin_pass) res_data = listAccessTokens( - admin_session, [{"field": "ownerUrn", "value": "urn:li:corpuser:user"}] + admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) assert res_data assert res_data["data"] @@ -362,7 +362,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): user_session.cookies.clear() user_session = loginAs("user", "user") res_data = listAccessTokens( - user_session, [{"field": "ownerUrn", "value": "urn:li:corpuser:user"}] + user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) assert res_data assert res_data["data"] @@ -372,7 +372,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): # Using the super account, check that all tokens where removed. admin_session = loginAs(admin_user, admin_pass) res_data = listAccessTokens( - admin_session, [{"field": "ownerUrn", "value": "urn:li:corpuser:user"}] + admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) assert res_data assert res_data["data"] From 1325b8ad71ea687b6b89adda1446a527581b4f42 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Tue, 4 Oct 2022 11:27:46 -0700 Subject: [PATCH 29/76] feat(search): improved search snippet FE logic (#6109) * starting improvements on search snippet * flesh out ranking logic * adding tests * responding to comments --- .../src/app/entity/dataset/DatasetEntity.tsx | 27 +------------ .../entity/dataset/DatasetSearchSnippet.tsx | 36 ++++++++++++++++++ .../app/entity/shared/__tests__/utils.test.ts | 37 ++++++++++++++++++ .../src/app/entity/shared/utils.ts | 38 +++++++++++++++++-- 4 files changed, 109 insertions(+), 29 deletions(-) create mode 100644 datahub-web-react/src/app/entity/dataset/DatasetSearchSnippet.tsx create mode 100644 datahub-web-react/src/app/entity/shared/__tests__/utils.test.ts diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index 0953d40605e225..eb6724acb2c8bd 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -1,10 +1,8 @@ import * as React from 'react'; import { DatabaseFilled, DatabaseOutlined } from '@ant-design/icons'; -import { Typography } from 'antd'; import { Dataset, DatasetProperties, EntityType, OwnershipType, SearchResult } from '../../../types.generated'; import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; import { Preview } from './preview/Preview'; -import { FIELDS_TO_HIGHLIGHT } from './search/highlights'; import { EntityProfile } from '../shared/containers/profile/EntityProfile'; import { GetDatasetQuery, useGetDatasetQuery, useUpdateDatasetMutation } from '../../../graphql/dataset.generated'; import { GenericEntityProperties } from '../shared/types'; @@ -28,8 +26,7 @@ import { OperationsTab } from './profile/OperationsTab'; import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; import { SidebarSiblingsSection } from '../shared/containers/profile/sidebar/SidebarSiblingsSection'; import { DatasetStatsSummarySubHeader } from './profile/stats/stats/DatasetStatsSummarySubHeader'; -import { TagSummary } from './shared/TagSummary'; -import { TermSummary } from './shared/TermSummary'; +import { DatasetSearchSnippet } from './DatasetSearchSnippet'; const SUBTYPES = { VIEW: 'view', @@ -256,18 +253,6 @@ export class DatasetEntity implements Entity { const data = result.entity as Dataset; const genericProperties = this.getGenericEntityProperties(data); - let snippet: React.ReactNode; - - if (result.matchedFields.length > 0) { - if (result.matchedFields[0].value.includes('urn:li:tag')) { - snippet = ; - } else if (result.matchedFields[0].value.includes('urn:li:glossaryTerm')) { - snippet = ; - } else { - snippet = {result.matchedFields[0].value}; - } - } - return ( { subtype={data.subTypes?.typeNames?.[0]} container={data.container} parentContainers={data.parentContainers} - snippet={ - // Add match highlights only if all the matched fields are in the FIELDS_TO_HIGHLIGHT - result.matchedFields.length > 0 && - result.matchedFields.every((field) => FIELDS_TO_HIGHLIGHT.has(field.name)) && ( - - Matches {FIELDS_TO_HIGHLIGHT.get(result.matchedFields[0].name)} {snippet} - - ) - } + snippet={} insights={result.insights} externalUrl={data.properties?.externalUrl} statsSummary={data.statsSummary} diff --git a/datahub-web-react/src/app/entity/dataset/DatasetSearchSnippet.tsx b/datahub-web-react/src/app/entity/dataset/DatasetSearchSnippet.tsx new file mode 100644 index 00000000000000..16da7ba8b06fe5 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataset/DatasetSearchSnippet.tsx @@ -0,0 +1,36 @@ +import React from 'react'; + +import { Typography } from 'antd'; +import { MatchedField } from '../../../types.generated'; +import { TagSummary } from './shared/TagSummary'; +import { TermSummary } from './shared/TermSummary'; +import { FIELDS_TO_HIGHLIGHT } from './search/highlights'; +import { getMatchPrioritizingPrimary } from '../shared/utils'; + +type Props = { + matchedFields: MatchedField[]; +}; + +const LABEL_INDEX_NAME = 'fieldLabels'; + +export const DatasetSearchSnippet = ({ matchedFields }: Props) => { + const matchedField = getMatchPrioritizingPrimary(matchedFields, LABEL_INDEX_NAME); + + let snippet: React.ReactNode; + + if (matchedField) { + if (matchedField.value.includes('urn:li:tag')) { + snippet = ; + } else if (matchedField.value.includes('urn:li:glossaryTerm')) { + snippet = ; + } else { + snippet = {matchedField.value}; + } + } + + return matchedField ? ( + + Matches {FIELDS_TO_HIGHLIGHT.get(matchedField.name)} {snippet}{' '} + + ) : null; +}; diff --git a/datahub-web-react/src/app/entity/shared/__tests__/utils.test.ts b/datahub-web-react/src/app/entity/shared/__tests__/utils.test.ts new file mode 100644 index 00000000000000..86dec46528b494 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/__tests__/utils.test.ts @@ -0,0 +1,37 @@ +import { getMatchPrioritizingPrimary } from '../utils'; + +const MOCK_MATCHED_FIELDS = [ + { + name: 'fieldPaths', + value: 'rain', + }, + { + name: 'description', + value: 'rainbow', + }, + { + name: 'fieldPaths', + value: 'rainbow', + }, + { + name: 'fieldPaths', + value: 'rainbows', + }, +]; + +describe('utils', () => { + describe('getMatchPrioritizingPrimary', () => { + it('prioritizes exact match', () => { + global.window.location.search = 'query=rainbow'; + const match = getMatchPrioritizingPrimary(MOCK_MATCHED_FIELDS, 'fieldPaths'); + expect(match?.value).toEqual('rainbow'); + expect(match?.name).toEqual('fieldPaths'); + }); + it('will accept first contains match', () => { + global.window.location.search = 'query=bow'; + const match = getMatchPrioritizingPrimary(MOCK_MATCHED_FIELDS, 'fieldPaths'); + expect(match?.value).toEqual('rainbow'); + expect(match?.name).toEqual('fieldPaths'); + }); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/utils.ts b/datahub-web-react/src/app/entity/shared/utils.ts index 7f6afc6fafaab6..32307d6ea4900a 100644 --- a/datahub-web-react/src/app/entity/shared/utils.ts +++ b/datahub-web-react/src/app/entity/shared/utils.ts @@ -1,3 +1,5 @@ +import * as QueryString from 'query-string'; + import { MatchedField } from '../../../types.generated'; import { FIELDS_TO_HIGHLIGHT } from '../dataset/search/highlights'; import { GenericEntityProperties } from './types'; @@ -83,14 +85,42 @@ export const isListSubset = (l1, l2): boolean => { return l1.every((result) => l2.indexOf(result) >= 0); }; +function normalize(value: string) { + return value.trim().toLowerCase(); +} + +function fromQueryGetBestMatch(selectedMatchedFields: MatchedField[], rawQuery: string) { + const query = normalize(rawQuery); + // first lets see if there's an exact match between a field value and the query + const exactMatch = selectedMatchedFields.find((field) => normalize(field.value) === query); + if (exactMatch) { + return exactMatch; + } + + // if no exact match exists, we'll see if the entire query is contained in any of the values + const containedMatch = selectedMatchedFields.find((field) => normalize(field.value).includes(query)); + if (containedMatch) { + return containedMatch; + } + + // otherwise, just return whichever is first + return selectedMatchedFields[0]; +} + export const getMatchPrioritizingPrimary = ( matchedFields: MatchedField[], primaryField: string, ): MatchedField | undefined => { - const primaryMatch = matchedFields.find((field) => field.name === primaryField); - if (primaryMatch) { - return primaryMatch; + const { location } = window; + const params = QueryString.parse(location.search, { arrayFormat: 'comma' }); + const query: string = decodeURIComponent(params.query ? (params.query as string) : ''); + + const primaryMatches = matchedFields.filter((field) => field.name === primaryField); + if (primaryMatches.length > 0) { + return fromQueryGetBestMatch(primaryMatches, query); } - return matchedFields.find((field) => FIELDS_TO_HIGHLIGHT.has(field.name)); + const matchesThatShouldBeShownOnFE = matchedFields.filter((field) => FIELDS_TO_HIGHLIGHT.has(field.name)); + + return fromQueryGetBestMatch(matchesThatShouldBeShownOnFE, query); }; From 6b83cab82cd2601912f335ec02ec6721679ebaff Mon Sep 17 00:00:00 2001 From: Thomas Tauber-Marshall Date: Tue, 4 Oct 2022 12:13:38 -0700 Subject: [PATCH 30/76] feat(ingest): add CorpUser and CorpGroup to the Python SDK (#5930) Co-authored-by: Harshal Sheth --- .../lineage_job_dataflow_new_api_verbose.py | 22 +++- .../api/entities/corpgroup/__init__.py | 1 + .../api/entities/corpgroup/corpgroup.py | 97 ++++++++++++++++ .../datahub/api/entities/corpuser/__init__.py | 1 + .../datahub/api/entities/corpuser/corpuser.py | 109 ++++++++++++++++++ .../datahub/api/entities/datajob/datajob.py | 10 +- 6 files changed, 237 insertions(+), 3 deletions(-) create mode 100644 metadata-ingestion/src/datahub/api/entities/corpgroup/__init__.py create mode 100644 metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py create mode 100644 metadata-ingestion/src/datahub/api/entities/corpuser/__init__.py create mode 100644 metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py diff --git a/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py b/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py index 769f63cae2f4a8..97acdbee1c99cf 100644 --- a/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py +++ b/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py @@ -1,6 +1,8 @@ import time import uuid +from datahub.api.entities.corpgroup.corpgroup import CorpGroup +from datahub.api.entities.corpuser.corpuser import CorpUser from datahub.api.entities.datajob.dataflow import DataFlow from datahub.api.entities.datajob.datajob import DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( @@ -22,7 +24,7 @@ dataJob2 = DataJob(flow_urn=jobFlow.urn, id="job2", name="My Job 2") dataJob2.upstream_urns.append(dataJob.urn) dataJob2.tags.add("TestTag") -dataJob2.owners.add("test@test.com") +dataJob2.owners.add("testUser") dataJob2.emit(emitter) dataJob3 = DataJob(flow_urn=jobFlow.urn, id="job3", name="My Job 3") @@ -32,6 +34,7 @@ dataJob4 = DataJob(flow_urn=jobFlow.urn, id="job4", name="My Job 4") dataJob4.upstream_urns.append(dataJob2.urn) dataJob4.upstream_urns.append(dataJob3.urn) +dataJob4.group_owners.add("testGroup") dataJob4.emit(emitter) # Hello World @@ -105,3 +108,20 @@ end_timestamp_millis=int(time.time() * 1000), result=InstanceRunResult.SUCCESS, ) + +user1 = CorpUser( + id="testUser", + display_name="Test User", + email="test-user@test.com", + groups=["testGroup"], +) +user1.emit(emitter) + +group1 = CorpGroup( + id="testGroup", + display_name="Test Group", + email="test-group@test.com", + slack="#test-group", + overrideEditable=True, +) +group1.emit(emitter) diff --git a/metadata-ingestion/src/datahub/api/entities/corpgroup/__init__.py b/metadata-ingestion/src/datahub/api/entities/corpgroup/__init__.py new file mode 100644 index 00000000000000..dc4be421eff5d1 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/corpgroup/__init__.py @@ -0,0 +1 @@ +from datahub.api.entities.corpgroup.corpgroup import CorpGroup diff --git a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py new file mode 100644 index 00000000000000..25ca60ade3a224 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Callable, Iterable, Optional, Union, cast + +import datahub.emitter.mce_builder as builder +from datahub.emitter.kafka_emitter import DatahubKafkaEmitter +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + CorpGroupEditableInfoClass, + CorpGroupInfoClass, +) + + +@dataclass +class CorpGroup: + """This is a CorpGroup class which represents a CorpGroup + + Args: + id (str): The id of the group + display_name (Optional[str]): The name of the group + email (Optional[str]): email of this group + description (Optional[str]): A description of the group + overrideEditable (bool): If True, group information that is editable in the UI will be overridden + picture_link (Optional[str]): A URL which points to a picture which user wants to set as the photo for the group + slack (Optional[str]): Slack channel for the group + """ + + id: str + urn: str = field(init=False) + + # These are for CorpGroupInfo + display_name: Optional[str] = None + email: Optional[str] = None + description: Optional[str] = None + + # These are for CorpGroupEditableInfo + overrideEditable: bool = False + picture_link: Optional[str] = None + slack: Optional[str] = None + + def __post_init__(self): + self.urn = builder.make_group_urn(self.id) + + def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]: + if self.overrideEditable: + mcp = MetadataChangeProposalWrapper( + entityType="corpgroup", + entityUrn=str(self.urn), + aspectName="corpGroupEditableInfo", + aspect=CorpGroupEditableInfoClass( + description=self.description, + pictureLink=self.picture_link, + slack=self.slack, + email=self.email, + ), + changeType=ChangeTypeClass.UPSERT, + ) + yield mcp + + mcp = MetadataChangeProposalWrapper( + entityType="corpgroup", + entityUrn=str(self.urn), + aspectName="corpGroupInfo", + aspect=CorpGroupInfoClass( + admins=[], # Deprecated, replaced by Ownership aspect + members=[], # Deprecated, replaced by GroupMembership aspect + groups=[], # Deprecated, this field is unused + displayName=self.display_name, + email=self.email, + description=self.description, + ), + changeType=ChangeTypeClass.UPSERT, + ) + yield mcp + + def emit( + self, + emitter: Union[DatahubRestEmitter, DatahubKafkaEmitter], + callback: Optional[Callable[[Exception, str], None]] = None, + ) -> None: + """ + Emit the CorpGroup entity to Datahub + + :param emitter: Datahub Emitter to emit the proccess event + :param callback: The callback method for KafkaEmitter if it is used + """ + for mcp in self.generate_mcp(): + if type(emitter).__name__ == "DatahubKafkaEmitter": + assert callback is not None + kafka_emitter = cast("DatahubKafkaEmitter", emitter) + kafka_emitter.emit(mcp, callback) + else: + rest_emitter = cast("DatahubRestEmitter", emitter) + rest_emitter.emit(mcp) diff --git a/metadata-ingestion/src/datahub/api/entities/corpuser/__init__.py b/metadata-ingestion/src/datahub/api/entities/corpuser/__init__.py new file mode 100644 index 00000000000000..ea94087746c248 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/corpuser/__init__.py @@ -0,0 +1 @@ +from datahub.api.entities.corpuser.corpuser import CorpUser diff --git a/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py b/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py new file mode 100644 index 00000000000000..00fe35ded5ee25 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Callable, Iterable, List, Optional, Union, cast + +import datahub.emitter.mce_builder as builder +from datahub.emitter.kafka_emitter import DatahubKafkaEmitter +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + CorpUserInfoClass, + GroupMembershipClass, +) + + +@dataclass +class CorpUser: + """This is a CorpUser class which represents a CorpUser + + Args: + id (str): The id of the user + display_name (Optional[str]): The name of the user to display in the UI + email (Optional[str]): email address of this user + title (Optional[str]): title of this user + manager_urn (Optional[str]): direct manager of this user + department_id (Optional[int]): department id this user belongs to + department_name (Optional[str]): department name this user belongs to + first_name (Optional[str]): first name of this user + last_name (Optional[str]): last name of this user + full_name (Optional[str]): Common name of this user, format is firstName + lastName (split by a whitespace) + country_code (Optional[str]): two uppercase letters country code. e.g. US + groups (List[str]): List of group ids the user belongs to + """ + + id: str + urn: str = field(init=False) + display_name: Optional[str] = None + email: Optional[str] = None + title: Optional[str] = None + manager_urn: Optional[str] = None + department_id: Optional[int] = None + department_name: Optional[str] = None + first_name: Optional[str] = None + last_name: Optional[str] = None + full_name: Optional[str] = None + country_code: Optional[str] = None + groups: List[str] = field(default_factory=list) + + def __post_init__(self): + self.urn = builder.make_user_urn(self.id) + + def generate_group_membership_aspect(self) -> Iterable[GroupMembershipClass]: + group_membership = GroupMembershipClass( + groups=[builder.make_group_urn(group) for group in self.groups] + ) + return [group_membership] + + def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]: + mcp = MetadataChangeProposalWrapper( + entityType="corpuser", + entityUrn=str(self.urn), + aspectName="corpUserInfo", + aspect=CorpUserInfoClass( + active=True, # Deprecated, use CorpUserStatus instead. + displayName=self.display_name, + email=self.email, + title=self.title, + managerUrn=self.manager_urn, + departmentId=self.department_id, + departmentName=self.department_name, + firstName=self.first_name, + lastName=self.last_name, + fullName=self.full_name, + countryCode=self.country_code, + ), + changeType=ChangeTypeClass.UPSERT, + ) + yield mcp + + for group_membership in self.generate_group_membership_aspect(): + mcp = MetadataChangeProposalWrapper( + entityType="corpuser", + entityUrn=str(self.urn), + aspectName="groupMembership", + aspect=group_membership, + changeType=ChangeTypeClass.UPSERT, + ) + yield mcp + + def emit( + self, + emitter: Union[DatahubRestEmitter, DatahubKafkaEmitter], + callback: Optional[Callable[[Exception, str], None]] = None, + ) -> None: + """ + Emit the CorpUser entity to Datahub + + :param emitter: Datahub Emitter to emit the proccess event + :param callback: The callback method for KafkaEmitter if it is used + """ + for mcp in self.generate_mcp(): + if type(emitter).__name__ == "DatahubKafkaEmitter": + assert callback is not None + kafka_emitter = cast("DatahubKafkaEmitter", emitter) + kafka_emitter.emit(mcp, callback) + else: + rest_emitter = cast("DatahubRestEmitter", emitter) + rest_emitter.emit(mcp) diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py index 8f33c7c3a4ec12..1440e7799a98f8 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py @@ -51,6 +51,8 @@ class DataJob: parent_instance (Optional[DataProcessInstanceUrn]): The parent execution's urn if applicable properties Dict[str, str]: Custom properties to set for the DataProcessInstance url (Optional[str]): Url which points to the DataJob at the orchestrator + owners Set[str]): A list of user ids that own this job. + group_owners Set[str]): A list of group ids that own this job. inlets (List[str]): List of urns the DataProcessInstance consumes outlets (List[str]): List of urns the DataProcessInstance produces input_datajob_urns: List[DataJobUrn] = field(default_factory=list) @@ -65,6 +67,7 @@ class DataJob: url: Optional[str] = None tags: Set[str] = field(default_factory=set) owners: Set[str] = field(default_factory=set) + group_owners: Set[str] = field(default_factory=set) inlets: List[DatasetUrn] = field(default_factory=list) outlets: List[DatasetUrn] = field(default_factory=list) upstream_urns: List[DataJobUrn] = field(default_factory=list) @@ -80,17 +83,20 @@ def __post_init__(self): ) def generate_ownership_aspect(self) -> Iterable[OwnershipClass]: + owners = set([builder.make_user_urn(owner) for owner in self.owners]) | set( + [builder.make_group_urn(owner) for owner in self.group_owners] + ) ownership = OwnershipClass( owners=[ OwnerClass( - owner=builder.make_user_urn(owner), + owner=urn, type=OwnershipTypeClass.DEVELOPER, source=OwnershipSourceClass( type=OwnershipSourceTypeClass.SERVICE, # url=dag.filepath, ), ) - for owner in (self.owners or []) + for urn in (owners or []) ], lastModified=AuditStampClass( time=0, From 3c0f63c50a0796c96b1677c5aaa717b37fcb4aa7 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 4 Oct 2022 19:14:00 +0000 Subject: [PATCH 31/76] fix(ingest): hide deprecated path_spec option from config (#5944) --- .../datahub/ingestion/source/aws/path_spec.py | 4 +- .../src/datahub/ingestion/source/s3/config.py | 99 +- .../delta_lake/test_local_delta_lake.py | 2 +- .../local/golden_mces_multiple_files.json | 1260 ++++------------- .../s3/sources/s3/multiple_files.json | 1 - .../tests/integration/s3/test_s3.py | 88 +- 6 files changed, 324 insertions(+), 1130 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/aws/path_spec.py index 4ec52574f0d810..7be9523e75d18f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/path_spec.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/path_spec.py @@ -42,12 +42,12 @@ class Config: table_name: Optional[str] = Field( default=None, - description="Display name of the dataset.Combination of named variableds from include path and strings", + description="Display name of the dataset.Combination of named variables from include path and strings", ) enable_compression: bool = Field( default=True, - description="Enable or disable processing compressed files. Currenly .gz and .bz files are supported.", + description="Enable or disable processing compressed files. Currently .gz and .bz files are supported.", ) sample_files: bool = Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index 4d5734679f0d15..15edb6f5c7a847 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -9,6 +9,7 @@ EnvBasedSourceConfigBase, PlatformSourceConfigBase, ) +from datahub.configuration.validate_field_rename import pydantic_renamed_field from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig from datahub.ingestion.source.aws.path_spec import PathSpec from datahub.ingestion.source.aws.s3_util import get_bucket_name @@ -20,18 +21,14 @@ class DataLakeSourceConfig(PlatformSourceConfigBase, EnvBasedSourceConfigBase): - path_specs: Optional[List[PathSpec]] = Field( - description="List of PathSpec. See below the details about PathSpec" - ) - path_spec: Optional[PathSpec] = Field( - description="Path spec will be deprecated in favour of path_specs option." + path_specs: List[PathSpec] = Field( + description="List of PathSpec. See [below](#path-spec) the details about PathSpec" ) platform: str = Field( - default="", description="The platform that this source connects to" - ) - platform_instance: Optional[str] = Field( - default=None, - description="The instance of the platform that all assets produced by this recipe belong to", + # The platform field already exists, but we want to override the type/default/docs. + default="", + description="The platform that this source connects to (either 's3' or 'file'). " + "If not specified, the platform will be inferred from the path_specs.", ) aws_config: Optional[AwsConnectionConfig] = Field( default=None, description="AWS configuration" @@ -64,51 +61,55 @@ class DataLakeSourceConfig(PlatformSourceConfigBase, EnvBasedSourceConfigBase): description="Maximum number of rows to use when inferring schemas for TSV and CSV files.", ) - @pydantic.root_validator(pre=False) - def validate_platform(cls, values: Dict) -> Dict: - if not values.get("path_specs") and not values.get("path_spec"): - raise ValueError("Either path_specs or path_spec needs to be specified") + _rename_path_spec_to_plural = pydantic_renamed_field( + "path_spec", "path_specs", lambda path_spec: [path_spec] + ) - if values.get("path_specs") and values.get("path_spec"): + @pydantic.validator("path_specs", always=True) + def check_path_specs_and_infer_platform( + cls, path_specs: List[PathSpec], values: Dict + ) -> List[PathSpec]: + if len(path_specs) == 0: + raise ValueError("path_specs must not be empty") + + # Check that all path specs have the same platform. + guessed_platforms = set( + "s3" if path_spec.is_s3 else "file" for path_spec in path_specs + ) + if len(guessed_platforms) > 1: raise ValueError( - "Either path_specs or path_spec needs to be specified but not both" + f"Cannot have multiple platforms in path_specs: {guessed_platforms}" ) + guessed_platform = guessed_platforms.pop() - if values.get("path_spec"): - logger.warning( - "path_spec config property is deprecated, please use path_specs instead of it." + # If platform is s3, check that they're all the same bucket. + if guessed_platform == "s3": + bucket_names = set( + get_bucket_name(path_spec.include) for path_spec in path_specs + ) + if len(bucket_names) > 1: + raise ValueError( + f"All path_specs should reference the same s3 bucket. Got {bucket_names}" + ) + + # Ensure s3 configs aren't used for file sources. + if guessed_platform != "s3" and ( + values.get("use_s3_object_tags") or values.get("use_s3_bucket_tags") + ): + raise ValueError( + "Cannot grab s3 object/bucket tags when platform is not s3. Remove the flag or use s3." ) - values["path_specs"] = [values.get("path_spec")] - - bucket_name: str = "" - for path_spec in values.get("path_specs", []): - if path_spec.is_s3: - platform = "s3" - else: - if values.get("use_s3_object_tags") or values.get("use_s3_bucket_tags"): - raise ValueError( - "cannot grab s3 tags for platform != s3. Remove the flag or use s3." - ) - - platform = "file" - - if values.get("platform", ""): - if platform == "s3" and values["platform"] != platform: - raise ValueError("all path_spec should belong to the same platform") - else: - values["platform"] = platform - logger.debug(f'Setting config "platform": {values.get("platform")}') - - if platform == "s3": - if bucket_name == "": - bucket_name = get_bucket_name(path_spec.include) - else: - if bucket_name != get_bucket_name(path_spec.include): - raise ValueError( - "all path_spec should reference the same s3 bucket" - ) - return values + # Infer platform if not specified. + if values.get("platform") and values["platform"] != guessed_platform: + raise ValueError( + f"All path_specs belong to {guessed_platform} platform, but platform is set to {values['platform']}" + ) + else: + logger.debug(f'Setting config "platform": {guessed_platform}') + values["platform"] = guessed_platform + + return path_specs @pydantic.root_validator() def ensure_profiling_pattern_is_passed_to_profiling( diff --git a/metadata-ingestion/tests/integration/delta_lake/test_local_delta_lake.py b/metadata-ingestion/tests/integration/delta_lake/test_local_delta_lake.py index 63c18ab8bc75ce..828f553783adc8 100644 --- a/metadata-ingestion/tests/integration/delta_lake/test_local_delta_lake.py +++ b/metadata-ingestion/tests/integration/delta_lake/test_local_delta_lake.py @@ -48,7 +48,7 @@ def test_delta_lake(pytestconfig, source_file, tmp_path, mock_time): ) -def test_data_lake_incorrect_config_raises_error(tmp_path, mock_time): +def test_delta_lake_incorrect_config_raises_error(tmp_path, mock_time): config_dict = {} config_dict["sink"] = { "type": "file", diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index 205316b9bfc64e..4dffbfdb2bb4a5 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -1,9 +1,8 @@ [ { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -11,32 +10,24 @@ "number_of_files": "1", "size_in_bytes": "3575" }, - "externalUrl": null, "name": "NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -46,9 +37,7 @@ "fields": [ { "fieldPath": "2", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -56,16 +45,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "3", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -73,16 +57,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Br \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -90,16 +69,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Ca \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -107,16 +81,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Cl \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -124,16 +93,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Cond (\u00b5S/cm)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -141,16 +105,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "DO (mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -158,16 +117,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "DOC [mg/L C]", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -175,16 +129,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "F \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -192,16 +141,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "K \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -209,16 +153,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Lat (\u00b0N)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -226,16 +165,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Long (\u00b0W)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -243,16 +177,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Mg \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -260,16 +189,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "NH3-N \n(mg N/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -277,16 +201,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "NO3-N+NO2-N \n(mg N/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -294,16 +213,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Na \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -311,16 +225,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "PO4-P \n(mg P/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -328,16 +237,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Park ID", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -345,16 +249,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "SO4-S \n(mg/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -362,16 +261,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "SUVA, 254nm", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -379,16 +273,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Sampling Date", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -396,16 +285,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Secchi Depth (m)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -413,16 +297,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Site ID", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -430,16 +309,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "TDN \n(mg N/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -447,16 +321,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "TDP \n(mg P/L)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -464,16 +333,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "UV Absorbance, 254nm", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -481,16 +345,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Water Temp (\u00b0C)", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -498,16 +357,11 @@ }, "nativeDataType": "number", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "d18O", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -515,16 +369,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "dD", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -532,16 +381,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "field29", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -549,16 +393,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "pH", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -566,72 +405,50 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:30fb6a1dfbb1cf9c0ff92844b14f1e22", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests\"}, \"name\": \"tests\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests\"}, \"name\": \"tests\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:30fb6a1dfbb1cf9c0ff92844b14f1e22", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:30fb6a1dfbb1cf9c0ff92844b14f1e22", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -640,55 +457,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:2e8794cad300a557e34cec3fbfd48a15", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:09bc75f9aaf92d57502aad33cab2e999", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration\"}, \"name\": \"integration\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration\"}, \"name\": \"integration\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:2e8794cad300a557e34cec3fbfd48a15", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:09bc75f9aaf92d57502aad33cab2e999", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:2e8794cad300a557e34cec3fbfd48a15", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:09bc75f9aaf92d57502aad33cab2e999", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -697,74 +499,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:2e8794cad300a557e34cec3fbfd48a15", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:09bc75f9aaf92d57502aad33cab2e999", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:30fb6a1dfbb1cf9c0ff92844b14f1e22\"}", + "value": "{\"container\": \"urn:li:container:c0b6448a96b5b99a7cabec1c4bfa66c0\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:9e02f1474054b9ad227be6b8ae5574a8", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:564adc1710f345e4777dbdc81a4b20db", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3\"}, \"name\": \"s3\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3\"}, \"name\": \"s3\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:9e02f1474054b9ad227be6b8ae5574a8", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:564adc1710f345e4777dbdc81a4b20db", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:9e02f1474054b9ad227be6b8ae5574a8", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:564adc1710f345e4777dbdc81a4b20db", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -773,74 +555,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:9e02f1474054b9ad227be6b8ae5574a8", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:564adc1710f345e4777dbdc81a4b20db", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:2e8794cad300a557e34cec3fbfd48a15\"}", + "value": "{\"container\": \"urn:li:container:09bc75f9aaf92d57502aad33cab2e999\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:c0f8692822906bb838cb93bedf5cc860", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c46207c164682005e865a54fcf7f4a9f", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data\"}, \"name\": \"test_data\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data\"}, \"name\": \"test_data\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:c0f8692822906bb838cb93bedf5cc860", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c46207c164682005e865a54fcf7f4a9f", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:c0f8692822906bb838cb93bedf5cc860", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c46207c164682005e865a54fcf7f4a9f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -849,74 +611,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:c0f8692822906bb838cb93bedf5cc860", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:c46207c164682005e865a54fcf7f4a9f", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:9e02f1474054b9ad227be6b8ae5574a8\"}", + "value": "{\"container\": \"urn:li:container:564adc1710f345e4777dbdc81a4b20db\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:e05cdbb4122cad868f29eea7e9571346", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:fd50ce59cb982671fc700636ab5744e2", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system\"}, \"name\": \"local_system\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system\"}, \"name\": \"local_system\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:e05cdbb4122cad868f29eea7e9571346", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:fd50ce59cb982671fc700636ab5744e2", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:e05cdbb4122cad868f29eea7e9571346", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:fd50ce59cb982671fc700636ab5744e2", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -925,74 +667,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:e05cdbb4122cad868f29eea7e9571346", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:fd50ce59cb982671fc700636ab5744e2", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:c0f8692822906bb838cb93bedf5cc860\"}", + "value": "{\"container\": \"urn:li:container:c46207c164682005e865a54fcf7f4a9f\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:161ffaf9bfa4603641b2fd53899edc52", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:34dcc9e05fe0d390619cbe1210771ba1", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system/folder_a\"}, \"name\": \"folder_a\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system/folder_a\"}, \"name\": \"folder_a\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:161ffaf9bfa4603641b2fd53899edc52", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:34dcc9e05fe0d390619cbe1210771ba1", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:161ffaf9bfa4603641b2fd53899edc52", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:34dcc9e05fe0d390619cbe1210771ba1", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1001,74 +723,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:161ffaf9bfa4603641b2fd53899edc52", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:34dcc9e05fe0d390619cbe1210771ba1", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:e05cdbb4122cad868f29eea7e9571346\"}", + "value": "{\"container\": \"urn:li:container:fd50ce59cb982671fc700636ab5744e2\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:47d5326aae0dbc82c93eb7c2ad186bb4", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:ec0a322960f194cdd055a5a6d5172ecb", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system/folder_a/folder_aa\"}, \"name\": \"folder_aa\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system/folder_a/folder_aa\"}, \"name\": \"folder_aa\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:47d5326aae0dbc82c93eb7c2ad186bb4", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:ec0a322960f194cdd055a5a6d5172ecb", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:47d5326aae0dbc82c93eb7c2ad186bb4", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:ec0a322960f194cdd055a5a6d5172ecb", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1077,74 +779,54 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:47d5326aae0dbc82c93eb7c2ad186bb4", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:ec0a322960f194cdd055a5a6d5172ecb", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:161ffaf9bfa4603641b2fd53899edc52\"}", + "value": "{\"container\": \"urn:li:container:34dcc9e05fe0d390619cbe1210771ba1\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:42f9e89a8684547e92b91ba826cb751a", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:46c2438600873ee3264c24c4ac6081b9", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"test-platform\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa\"}, \"name\": \"folder_aaa\"}", + "value": "{\"customProperties\": {\"platform\": \"file\", \"instance\": \"test-platform-instance\", \"folder_abs_path\": \"tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa\"}, \"name\": \"folder_aaa\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:42f9e89a8684547e92b91ba826cb751a", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:46c2438600873ee3264c24c4ac6081b9", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:test-platform\"}", + "value": "{\"platform\": \"urn:li:dataPlatform:file\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:42f9e89a8684547e92b91ba826cb751a", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:46c2438600873ee3264c24c4ac6081b9", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1153,55 +835,40 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "container", - "entityUrn": "urn:li:container:42f9e89a8684547e92b91ba826cb751a", - "entityKeyAspect": null, + "entityUrn": "urn:li:container:46c2438600873ee3264c24c4ac6081b9", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:47d5326aae0dbc82c93eb7c2ad186bb4\"}", + "value": "{\"container\": \"urn:li:container:ec0a322960f194cdd055a5a6d5172ecb\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1210,17 +877,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -1228,32 +891,24 @@ "number_of_files": "1", "size_in_bytes": "1024" }, - "externalUrl": null, "name": "chord_progressions_avro.avro", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "chord_progressions_avro.avro", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1263,9 +918,7 @@ "fields": [ { "fieldPath": "[version=2.0].[type=Root].[type=double].Progression Quality", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1273,16 +926,11 @@ }, "nativeDataType": "double", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "[version=2.0].[type=Root].[type=long].1st chord", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1290,16 +938,11 @@ }, "nativeDataType": "long", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "[version=2.0].[type=Root].[type=long].2nd chord", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1307,16 +950,11 @@ }, "nativeDataType": "long", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "[version=2.0].[type=Root].[type=long].3rd chord", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1324,16 +962,11 @@ }, "nativeDataType": "long", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "[version=2.0].[type=Root].[type=string].4th chord", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -1341,53 +974,36 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1396,17 +1012,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -1414,32 +1026,24 @@ "number_of_files": "1", "size_in_bytes": "604" }, - "externalUrl": null, "name": "chord_progressions_csv.csv", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "chord_progressions_csv.csv", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1449,9 +1053,7 @@ "fields": [ { "fieldPath": "1st chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1459,16 +1061,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "2nd chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1476,16 +1073,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "3rd chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1493,16 +1085,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "4th chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1510,16 +1097,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Progression Quality", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -1527,53 +1109,36 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1582,17 +1147,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -1600,32 +1161,24 @@ "number_of_files": "1", "size_in_bytes": "4646" }, - "externalUrl": null, "name": "countries_json.json", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "countries_json.json", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1635,28 +1188,19 @@ "fields": [ { "fieldPath": "countries", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { - "com.linkedin.pegasus2avro.schema.ArrayType": { - "nestedType": null - } + "com.linkedin.pegasus2avro.schema.ArrayType": {} } }, "nativeDataType": "list", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "countries.code", - "jsonPath": null, "nullable": true, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -1664,16 +1208,11 @@ }, "nativeDataType": "str", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "countries.name", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -1681,53 +1220,36 @@ }, "nativeDataType": "str", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1736,17 +1258,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -1754,32 +1272,24 @@ "number_of_files": "1", "size_in_bytes": "4206" }, - "externalUrl": null, "name": "food_parquet.parquet", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "food_parquet.parquet", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1789,9 +1299,7 @@ "fields": [ { "fieldPath": "color", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -1799,16 +1307,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "healthy", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.BooleanType": {} @@ -1816,16 +1319,11 @@ }, "nativeDataType": "bool", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "height", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1833,16 +1331,11 @@ }, "nativeDataType": "int64", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "name", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -1850,16 +1343,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "weight", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1867,53 +1355,36 @@ }, "nativeDataType": "int64", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1922,17 +1393,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -1940,32 +1407,24 @@ "number_of_files": "1", "size_in_bytes": "172" }, - "externalUrl": null, "name": "small.csv", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "small.csv", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -1975,9 +1434,7 @@ "fields": [ { "fieldPath": "1st chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1985,16 +1442,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "2nd chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2002,16 +1454,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "3rd chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2019,16 +1466,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "4th chord", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2036,16 +1478,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "Progression Quality", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2053,53 +1490,36 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -2108,17 +1528,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -2126,32 +1542,24 @@ "number_of_files": "1", "size_in_bytes": "34056" }, - "externalUrl": null, "name": "wa_fn_usec_hr_employee_attrition_csv.csv", - "qualifiedName": null, "description": "", - "uri": null, "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { "schemaName": "wa_fn_usec_hr_employee_attrition_csv.csv", - "platform": "urn:li:dataPlatform:test-platform", + "platform": "urn:li:dataPlatform:file", "version": 0, "created": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null + "actor": "urn:li:corpuser:unknown" }, - "deleted": null, - "dataset": null, - "cluster": null, "hash": "", "platformSchema": { "com.linkedin.pegasus2avro.schema.OtherSchema": { @@ -2161,9 +1569,7 @@ "fields": [ { "fieldPath": "age", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2171,16 +1577,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "attrition", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2188,16 +1589,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "businesstravel", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2205,16 +1601,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "dailyrate", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2222,16 +1613,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "department", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2239,16 +1625,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "distancefromhome", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2256,16 +1637,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "education", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2273,16 +1649,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "educationfield", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2290,16 +1661,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "employeecount", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2307,16 +1673,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "environmentsatisfaction", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2324,16 +1685,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "gender", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2341,16 +1697,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "jobinvolvement", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2358,16 +1709,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "joblevel", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2375,16 +1721,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "jobrole", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2392,16 +1733,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "jobsatisfaction", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2409,16 +1745,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "maritalstatus", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2426,16 +1757,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "numcompaniesworked", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2443,16 +1769,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "over18", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2460,16 +1781,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "overtime", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.StringType": {} @@ -2477,16 +1793,11 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "percentsalaryhike", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2494,16 +1805,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "performancerating", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2511,16 +1817,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "relationshipsatisfaction", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2528,16 +1829,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "standardhours", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2545,16 +1841,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "stockoptionlevel", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2562,16 +1853,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "totalworkingyears", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2579,16 +1865,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "trainingtimeslastyear", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2596,16 +1877,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "worklifebalance", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2613,16 +1889,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "yearsatcompany", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2630,16 +1901,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "yearsincurrentrole", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2647,16 +1913,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "yearssincelastpromotion", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2664,16 +1925,11 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false }, { "fieldPath": "yearswithcurrmanager", - "jsonPath": null, "nullable": false, - "description": null, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2681,53 +1937,36 @@ }, "nativeDataType": "integer", "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null + "isPartOfKey": false } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + ] } } ] } }, - "proposedDelta": null, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:42f9e89a8684547e92b91ba826cb751a\"}", + "value": "{\"container\": \"urn:li:container:46c2438600873ee3264c24c4ac6081b9\"}", "contentType": "application/json" }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } }, { - "auditHeader": null, "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:test-platform,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "entityKeyAspect": null, + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -2736,10 +1975,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "registryName": null, - "registryVersion": null, - "properties": null + "runId": "multiple_files.json" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/sources/s3/multiple_files.json b/metadata-ingestion/tests/integration/s3/sources/s3/multiple_files.json index 76db50844fe512..77be022895cfca 100644 --- a/metadata-ingestion/tests/integration/s3/sources/s3/multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/sources/s3/multiple_files.json @@ -1,7 +1,6 @@ { "type": "s3", "config": { - "platform": "test-platform", "platform_instance": "test-platform-instance", "env": "DEV", "path_specs": [{ diff --git a/metadata-ingestion/tests/integration/s3/test_s3.py b/metadata-ingestion/tests/integration/s3/test_s3.py index 90bb4927763bda..7632962e1fcc3d 100644 --- a/metadata-ingestion/tests/integration/s3/test_s3.py +++ b/metadata-ingestion/tests/integration/s3/test_s3.py @@ -7,7 +7,8 @@ from moto import mock_s3 from pydantic import ValidationError -from datahub.ingestion.run.pipeline import Pipeline, PipelineInitError +from datahub.ingestion.run.pipeline import Pipeline, PipelineContext +from datahub.ingestion.source.s3.source import S3Source from tests.test_helpers import mce_helpers FROZEN_TIME = "2020-04-14 07:00:00" @@ -78,7 +79,6 @@ def test_data_lake_s3_ingest( f = open(os.path.join(SOURCE_FILES_PATH, source_file)) source = json.load(f) - source["config"]["platform"] = "s3" config_dict = {} config_dict["source"] = source @@ -119,11 +119,8 @@ def test_data_lake_local_ingest(pytestconfig, source_file, tmp_path, mock_time): source["config"]["profiling"]["enabled"] = True source["config"].pop("aws_config") - # Only pop the key/value for configs that contain the key - if "use_s3_bucket_tags" in source["config"]: - source["config"].pop("use_s3_bucket_tags") - if "use_s3_object_tags" in source["config"]: - source["config"].pop("use_s3_object_tags") + source["config"].pop("use_s3_bucket_tags", None) + source["config"].pop("use_s3_object_tags", None) config_dict["source"] = source config_dict["sink"] = { "type": "file", @@ -147,78 +144,39 @@ def test_data_lake_local_ingest(pytestconfig, source_file, tmp_path, mock_time): def test_data_lake_incorrect_config_raises_error(tmp_path, mock_time): - - config_dict = {} - config_dict["sink"] = { - "type": "file", - "config": { - "filename": f"{tmp_path}/mces.json", - }, - } + ctx = PipelineContext(run_id="test-s3") # Case 1 : named variable in table name is not present in include - source = { - "type": "s3", - "config": { - "path_spec": {"include": "a/b/c/d/{table}.*", "table_name": "{table1}"} - }, + source: dict = { + "path_spec": {"include": "a/b/c/d/{table}.*", "table_name": "{table1}"} } - config_dict["source"] = source - with pytest.raises(PipelineInitError) as e_info: - Pipeline.create(config_dict) - - assert e_info._excinfo - assert isinstance(e_info._excinfo[1].__cause__, ValidationError) - logging.debug(e_info) + with pytest.raises(ValidationError, match="table_name"): + S3Source.create(source, ctx) # Case 2 : named variable in exclude is not allowed source = { - "type": "s3", - "config": { - "path_spec": { - "include": "a/b/c/d/{table}/*.*", - "exclude": ["a/b/c/d/a-{exclude}/**"], - } + "path_spec": { + "include": "a/b/c/d/{table}/*.*", + "exclude": ["a/b/c/d/a-{exclude}/**"], }, } - config_dict["source"] = source - with pytest.raises(PipelineInitError) as e_info: - Pipeline.create(config_dict) - - assert e_info._excinfo - assert isinstance(e_info._excinfo[1].__cause__, ValidationError) - logging.debug(e_info) + with pytest.raises(ValidationError, match=r"exclude.*named variable"): + S3Source.create(source, ctx) # Case 3 : unsupported file type not allowed source = { - "type": "s3", - "config": { - "path_spec": { - "include": "a/b/c/d/{table}/*.hd5", - } - }, + "path_spec": { + "include": "a/b/c/d/{table}/*.hd5", + } } - config_dict["source"] = source - with pytest.raises(PipelineInitError) as e_info: - Pipeline.create(config_dict) - - assert e_info._excinfo - assert isinstance(e_info._excinfo[1].__cause__, ValidationError) - logging.debug(e_info) + with pytest.raises(ValidationError, match="file type"): + S3Source.create(source, ctx) # Case 4 : ** in include not allowed source = { - "type": "s3", - "config": { - "path_spec": { - "include": "a/b/c/d/**/*.*", - } + "path_spec": { + "include": "a/b/c/d/**/*.*", }, } - config_dict["source"] = source - with pytest.raises(PipelineInitError) as e_info: - Pipeline.create(config_dict) - - assert e_info._excinfo - assert isinstance(e_info._excinfo[1].__cause__, ValidationError) - logging.debug(e_info) + with pytest.raises(ValidationError, match=r"\*\*"): + S3Source.create(source, ctx) From ee1a1ef45bdcfe070cf7b13e9ab7d5eac33760b1 Mon Sep 17 00:00:00 2001 From: Aditya Radhakrishnan Date: Tue, 4 Oct 2022 15:37:28 -0700 Subject: [PATCH 32/76] feat(posts): add posts feature to DataHub (#6110) --- datahub-frontend/play.gradle | 4 +- .../datahub/graphql/GmsGraphQLEngine.java | 10 +- .../authorization/AuthorizationUtils.java | 4 + .../resolvers/post/CreatePostResolver.java | 60 +++++ .../resolvers/post/ListPostsResolver.java | 72 ++++++ .../graphql/types/post/PostMapper.java | 76 ++++++ .../src/main/resources/entity.graphql | 235 ++++++++++++++++++ .../post/CreatePostResolverTest.java | 91 +++++++ .../resolvers/post/ListPostsResolverTest.java | 120 +++++++++ .../src/app/home/HomePagePosts.tsx | 62 +++++ .../src/app/home/HomePageRecommendations.tsx | 4 +- .../src/app/search/PostLinkCard.tsx | 94 +++++++ .../src/app/search/PostTextCard.tsx | 65 +++++ .../src/graphql/mutations.graphql | 4 + datahub-web-react/src/graphql/post.graphql | 22 ++ .../java/com/linkedin/metadata/Constants.java | 6 +- .../pegasus/com/linkedin/common/Media.pdl | 16 ++ .../pegasus/com/linkedin/common/MediaType.pdl | 11 + .../com/linkedin/metadata/key/PostKey.pdl | 14 ++ .../pegasus/com/linkedin/post/PostContent.pdl | 37 +++ .../com/linkedin/post/PostContentType.pdl | 16 ++ .../pegasus/com/linkedin/post/PostInfo.pdl | 35 +++ .../pegasus/com/linkedin/post/PostType.pdl | 11 + .../src/main/resources/entity-registry.yml | 5 + .../authentication/post/PostService.java | 71 ++++++ .../authentication/post/PostServiceTest.java | 66 +++++ .../gms/factory/auth/PostServiceFactory.java | 28 +++ .../factory/graphql/GraphQLEngineFactory.java | 7 + .../authorization/PoliciesConfig.java | 7 +- 29 files changed, 1246 insertions(+), 7 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolverTest.java create mode 100644 datahub-web-react/src/app/home/HomePagePosts.tsx create mode 100644 datahub-web-react/src/app/search/PostLinkCard.tsx create mode 100644 datahub-web-react/src/app/search/PostTextCard.tsx create mode 100644 datahub-web-react/src/graphql/post.graphql create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/Media.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/MediaType.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/key/PostKey.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/post/PostContent.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/post/PostContentType.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/post/PostInfo.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/post/PostType.pdl create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authentication/post/PostService.java create mode 100644 metadata-service/auth-impl/src/test/java/com/datahub/authentication/post/PostServiceTest.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index 123cce7eb3cfbf..fb08cbddc1b071 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -72,7 +72,7 @@ play { platform { playVersion = '2.7.6' scalaVersion = '2.12' - javaVersion = JavaVersion.VERSION_1_8 + javaVersion = JavaVersion.VERSION_11 } injectedRoutesGenerator = true @@ -81,7 +81,7 @@ play { model { components { play { - platform play: '2.7.6', scala: '2.12', java: '1.8' + platform play: '2.7.6', scala: '2.12', java: '11' injectedRoutesGenerator = true binaries.all { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index db9f999a2bbe32..a2492082f70661 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -3,6 +3,7 @@ import com.datahub.authentication.AuthenticationConfiguration; import com.datahub.authentication.group.GroupService; import com.datahub.authentication.invite.InviteTokenService; +import com.datahub.authentication.post.PostService; import com.datahub.authentication.token.StatefulTokenService; import com.datahub.authentication.user.NativeUserService; import com.datahub.authorization.AuthorizationConfiguration; @@ -175,6 +176,8 @@ import com.linkedin.datahub.graphql.resolvers.policy.GetGrantedPrivilegesResolver; import com.linkedin.datahub.graphql.resolvers.policy.ListPoliciesResolver; import com.linkedin.datahub.graphql.resolvers.policy.UpsertPolicyResolver; +import com.linkedin.datahub.graphql.resolvers.post.CreatePostResolver; +import com.linkedin.datahub.graphql.resolvers.post.ListPostsResolver; import com.linkedin.datahub.graphql.resolvers.recommendation.ListRecommendationsResolver; import com.linkedin.datahub.graphql.resolvers.role.AcceptRoleResolver; import com.linkedin.datahub.graphql.resolvers.role.BatchAssignRoleResolver; @@ -310,6 +313,7 @@ public class GmsGraphQLEngine { private final GroupService groupService; private final RoleService roleService; private final InviteTokenService inviteTokenService; + private final PostService postService; private final FeatureFlags featureFlags; @@ -386,7 +390,7 @@ public GmsGraphQLEngine(final EntityClient entityClient, final GraphClient graph final VisualConfiguration visualConfiguration, final TelemetryConfiguration telemetryConfiguration, final TestsConfiguration testsConfiguration, final DatahubConfiguration datahubConfiguration, final SiblingGraphService siblingGraphService, final GroupService groupService, final RoleService roleService, - final InviteTokenService inviteTokenService, final FeatureFlags featureFlags) { + final InviteTokenService inviteTokenService, final PostService postService, final FeatureFlags featureFlags) { this.entityClient = entityClient; this.graphClient = graphClient; @@ -407,6 +411,7 @@ public GmsGraphQLEngine(final EntityClient entityClient, final GraphClient graph this.groupService = groupService; this.roleService = roleService; this.inviteTokenService = inviteTokenService; + this.postService = postService; this.ingestionConfiguration = Objects.requireNonNull(ingestionConfiguration); this.authenticationConfiguration = Objects.requireNonNull(authenticationConfiguration); @@ -676,6 +681,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("entities", getEntitiesResolver()) .dataFetcher("listRoles", new ListRolesResolver(this.entityClient)) .dataFetcher("getInviteToken", new GetInviteTokenResolver(this.inviteTokenService)) + .dataFetcher("listPosts", new ListPostsResolver(this.entityClient)) ); } @@ -798,7 +804,7 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("batchAssignRole", new BatchAssignRoleResolver(this.roleService)) .dataFetcher("createInviteToken", new CreateInviteTokenResolver(this.inviteTokenService)) .dataFetcher("acceptRole", new AcceptRoleResolver(this.roleService, this.inviteTokenService)) - + .dataFetcher("createPost", new CreatePostResolver(this.postService)) ); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index f4034769082953..be7ab57eb8aa15 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -107,6 +107,10 @@ public static boolean canEditGroupMembers(@Nonnull String groupUrnStr, @Nonnull groupUrnStr, orPrivilegeGroups); } + public static boolean canCreateGlobalAnnouncements(@Nonnull QueryContext context) { + return isAuthorized(context, Optional.empty(), PoliciesConfig.CREATE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE); + } + public static boolean isAuthorized( @Nonnull QueryContext context, @Nonnull Optional resourceSpec, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java new file mode 100644 index 00000000000000..524caf14e9afe4 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java @@ -0,0 +1,60 @@ +package com.linkedin.datahub.graphql.resolvers.post; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.post.PostService; +import com.linkedin.common.Media; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.CreatePostInput; +import com.linkedin.datahub.graphql.generated.PostContentType; +import com.linkedin.datahub.graphql.generated.PostType; +import com.linkedin.datahub.graphql.generated.UpdateMediaInput; +import com.linkedin.datahub.graphql.generated.UpdatePostContentInput; +import com.linkedin.post.PostContent; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; + + +@Slf4j +@RequiredArgsConstructor +public class CreatePostResolver implements DataFetcher> { + private final PostService _postService; + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + + if (!AuthorizationUtils.canCreateGlobalAnnouncements(context)) { + throw new AuthorizationException( + "Unauthorized to create posts. Please contact your DataHub administrator if this needs corrective action."); + } + + final CreatePostInput input = bindArgument(environment.getArgument("input"), CreatePostInput.class); + final PostType type = input.getPostType(); + final UpdatePostContentInput content = input.getContent(); + final PostContentType contentType = content.getContentType(); + final String title = content.getTitle(); + final String link = content.getLink(); + final String description = content.getDescription(); + final UpdateMediaInput updateMediaInput = content.getMedia(); + final Authentication authentication = context.getAuthentication(); + + Media media = updateMediaInput == null ? null + : _postService.mapMedia(updateMediaInput.getType().toString(), updateMediaInput.getLocation()); + PostContent postContent = _postService.mapPostContent(contentType.toString(), title, description, link, media); + + return CompletableFuture.supplyAsync(() -> { + try { + return _postService.createPost(type.toString(), postContent, authentication); + } catch (Exception e) { + throw new RuntimeException("Failed to create a new post", e); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java new file mode 100644 index 00000000000000..839c5b5d1add1a --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java @@ -0,0 +1,72 @@ +package com.linkedin.datahub.graphql.resolvers.post; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.ListPostsInput; +import com.linkedin.datahub.graphql.generated.ListPostsResult; +import com.linkedin.datahub.graphql.types.post.PostMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchResult; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.HashSet; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.metadata.Constants.*; + + +@Slf4j +@RequiredArgsConstructor +public class ListPostsResolver implements DataFetcher> { + private static final Integer DEFAULT_START = 0; + private static final Integer DEFAULT_COUNT = 20; + private static final String DEFAULT_QUERY = ""; + + private final EntityClient _entityClient; + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final Authentication authentication = context.getAuthentication(); + + final ListPostsInput input = bindArgument(environment.getArgument("input"), ListPostsInput.class); + final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); + final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); + final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); + + return CompletableFuture.supplyAsync(() -> { + try { + final SortCriterion sortCriterion = + new SortCriterion().setField(LAST_MODIFIED_FIELD_NAME).setOrder(SortOrder.DESCENDING); + + // First, get all Post Urns. + final SearchResult gmsResult = _entityClient.search(POST_ENTITY_NAME, query, null, sortCriterion, start, count, + context.getAuthentication()); + + // Then, get and hydrate all Posts. + final Map entities = _entityClient.batchGetV2(POST_ENTITY_NAME, + new HashSet<>(gmsResult.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList())), + null, authentication); + + final ListPostsResult result = new ListPostsResult(); + result.setStart(gmsResult.getFrom()); + result.setCount(gmsResult.getPageSize()); + result.setTotal(gmsResult.getNumEntities()); + result.setPosts(entities.values().stream().map(PostMapper::map).collect(Collectors.toList())); + return result; + } catch (Exception e) { + throw new RuntimeException("Failed to list posts", e); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java new file mode 100644 index 00000000000000..791197c7d47e49 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java @@ -0,0 +1,76 @@ +package com.linkedin.datahub.graphql.types.post; + +import com.linkedin.data.DataMap; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.Media; +import com.linkedin.datahub.graphql.generated.MediaType; +import com.linkedin.datahub.graphql.generated.Post; +import com.linkedin.datahub.graphql.generated.PostContent; +import com.linkedin.datahub.graphql.generated.PostContentType; +import com.linkedin.datahub.graphql.generated.PostType; +import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.post.PostInfo; +import javax.annotation.Nonnull; + +import static com.linkedin.metadata.Constants.*; + + +public class PostMapper implements ModelMapper { + + public static final PostMapper INSTANCE = new PostMapper(); + + public static Post map(@Nonnull final EntityResponse entityResponse) { + return INSTANCE.apply(entityResponse); + } + + @Override + public Post apply(@Nonnull final EntityResponse entityResponse) { + final Post result = new Post(); + + result.setUrn(entityResponse.getUrn().toString()); + result.setType(EntityType.POST); + EnvelopedAspectMap aspectMap = entityResponse.getAspects(); + MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper.mapToResult(POST_INFO_ASPECT_NAME, this::mapPostInfo); + return mappingHelper.getResult(); + } + + private void mapPostInfo(@Nonnull Post post, @Nonnull DataMap dataMap) { + PostInfo postInfo = new PostInfo(dataMap); + post.setPostType(PostType.valueOf(postInfo.getType().toString())); + post.setContent(mapPostContent(postInfo.getContent())); + AuditStamp lastModified = new AuditStamp(); + lastModified.setTime(postInfo.getLastModified()); + post.setLastModified(lastModified); + } + + @Nonnull + private com.linkedin.datahub.graphql.generated.PostContent mapPostContent( + @Nonnull com.linkedin.post.PostContent postContent) { + PostContent result = new PostContent(); + result.setContentType(PostContentType.valueOf(postContent.getType().toString())); + result.setTitle(postContent.getTitle()); + if (postContent.hasDescription()) { + result.setDescription(postContent.getDescription()); + } + if (postContent.hasLink()) { + result.setLink(postContent.getLink().toString()); + } + if (postContent.hasMedia()) { + result.setMedia(mapPostMedia(postContent.getMedia())); + } + return result; + } + + @Nonnull + private Media mapPostMedia(@Nonnull com.linkedin.common.Media postMedia) { + Media result = new Media(); + result.setType(MediaType.valueOf(postMedia.getType().toString())); + result.setLocation(postMedia.getLocation().toString()); + return result; + } +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 58613e0b92f743..9722b12ea25fcc 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -183,6 +183,11 @@ type Query { Get invite token """ getInviteToken(input: GetInviteTokenInput!): InviteToken + + """ + List all Posts + """ + listPosts(input: ListPostsInput!): ListPostsResult } """ @@ -518,6 +523,11 @@ type Mutation { Create invite token """ createInviteToken(input: CreateInviteTokenInput!): InviteToken + + """ + Create a post + """ + createPost(input: CreatePostInput!): Boolean } """ @@ -693,6 +703,11 @@ enum EntityType { A DataHub Role """ DATAHUB_ROLE + + """ + A DataHub Post + """ + POST } """ @@ -9385,3 +9400,223 @@ input AcceptRoleInput { """ inviteToken: String! } + +""" +The type of post +""" +enum PostType { + """ + Posts on the home page + """ + HOME_PAGE_ANNOUNCEMENT, +} + +""" +The type of post +""" +enum PostContentType { + """ + Text content + """ + TEXT, + + """ + Link content + """ + LINK +} + +""" +The type of media +""" +enum MediaType { + """ + An image + """ + IMAGE +} + +""" +Input provided when creating a Post +""" +input CreatePostInput { + """ + The type of post + """ + postType: PostType! + + """ + The content of the post + """ + content: UpdatePostContentInput! +} + +""" +Input provided for filling in a post content +""" +input UpdatePostContentInput { + """ + The type of post content + """ + contentType: PostContentType! + + """ + The title of the post + """ + title: String! + + """ + Optional content of the post + """ + description: String + + """ + Optional link that the post is associated with + """ + link: String + + """ + Optional media contained in the post + """ + media: UpdateMediaInput +} + +""" +Input provided for filling in a post content +""" +input UpdateMediaInput { + """ + The type of media + """ + type: MediaType! + + """ + The location of the media (a URL) + """ + location: String! +} + +""" +Input provided when listing existing posts +""" +input ListPostsInput { + """ + The starting offset of the result set returned + """ + start: Int + + """ + The maximum number of Roles to be returned in the result set + """ + count: Int + + """ + Optional search query + """ + query: String +} + +""" +The result obtained when listing Posts +""" +type ListPostsResult { + """ + The starting offset of the result set returned + """ + start: Int! + + """ + The number of Roles in the returned result set + """ + count: Int! + + """ + The total number of Roles in the result set + """ + total: Int! + + """ + The Posts themselves + """ + posts: [Post!]! +} + +""" +Input provided when creating a Post +""" +type Post implements Entity { + """ + The primary key of the Post + """ + urn: String! + + """ + The standard Entity Type + """ + type: EntityType! + + """ + Granular API for querying edges extending from the Post + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult + + """ + The type of post + """ + postType: PostType! + + """ + The content of the post + """ + content: PostContent! + + """ + When the post was last modified + """ + lastModified: AuditStamp! +} + +""" +Post content +""" +type PostContent { + """ + The type of post content + """ + contentType: PostContentType! + + """ + The title of the post + """ + title: String! + + """ + Optional content of the post + """ + description: String + + """ + Optional link that the post is associated with + """ + link: String + + """ + Optional media contained in the post + """ + media: Media +} + +""" +Media content +""" +type Media { + """ + The type of media + """ + type: MediaType! + + """ + The location of the media (a URL) + """ + location: String! +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolverTest.java new file mode 100644 index 00000000000000..b56d897a468ba8 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolverTest.java @@ -0,0 +1,91 @@ +package com.linkedin.datahub.graphql.resolvers.post; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.post.PostService; +import com.linkedin.common.Media; +import com.linkedin.common.url.Url; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.CreatePostInput; +import com.linkedin.datahub.graphql.generated.MediaType; +import com.linkedin.datahub.graphql.generated.PostContentType; +import com.linkedin.datahub.graphql.generated.PostType; +import com.linkedin.datahub.graphql.generated.UpdateMediaInput; +import com.linkedin.datahub.graphql.generated.UpdatePostContentInput; +import graphql.schema.DataFetchingEnvironment; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + + +public class CreatePostResolverTest { + private static final MediaType POST_MEDIA_TYPE = MediaType.IMAGE; + private static final String POST_MEDIA_LOCATION = + "https://datahubproject.io/img/datahub-logo-color-light-horizontal.svg"; + private static final PostContentType POST_CONTENT_TYPE = PostContentType.LINK; + private static final String POST_TITLE = "title"; + private static final String POST_DESCRIPTION = "description"; + private static final String POST_LINK = "https://datahubproject.io"; + private PostService _postService; + private CreatePostResolver _resolver; + private DataFetchingEnvironment _dataFetchingEnvironment; + private Authentication _authentication; + + @BeforeMethod + public void setupTest() throws Exception { + _postService = mock(PostService.class); + _dataFetchingEnvironment = mock(DataFetchingEnvironment.class); + _authentication = mock(Authentication.class); + + _resolver = new CreatePostResolver(_postService); + } + + @Test + public void testNotAuthorizedFails() { + QueryContext mockContext = getMockDenyContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + + assertThrows(() -> _resolver.get(_dataFetchingEnvironment).join()); + } + + @Test + public void testCreatePost() throws Exception { + QueryContext mockContext = getMockAllowContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + when(mockContext.getAuthentication()).thenReturn(_authentication); + + UpdateMediaInput media = new UpdateMediaInput(); + media.setType(POST_MEDIA_TYPE); + media.setLocation(POST_MEDIA_LOCATION); + Media mediaObj = new Media().setType(com.linkedin.common.MediaType.valueOf(POST_MEDIA_TYPE.toString())) + .setLocation(new Url(POST_MEDIA_LOCATION)); + when(_postService.mapMedia(eq(POST_MEDIA_TYPE.toString()), eq(POST_MEDIA_LOCATION))).thenReturn(mediaObj); + + UpdatePostContentInput content = new UpdatePostContentInput(); + content.setTitle(POST_TITLE); + content.setDescription(POST_DESCRIPTION); + content.setLink(POST_LINK); + content.setContentType(POST_CONTENT_TYPE); + content.setMedia(media); + com.linkedin.post.PostContent postContentObj = new com.linkedin.post.PostContent().setType( + com.linkedin.post.PostContentType.valueOf(POST_CONTENT_TYPE.toString())) + .setTitle(POST_TITLE) + .setDescription(POST_DESCRIPTION) + .setLink(new Url(POST_LINK)) + .setMedia(new Media().setType(com.linkedin.common.MediaType.valueOf(POST_MEDIA_TYPE.toString())) + .setLocation(new Url(POST_MEDIA_LOCATION))); + when(_postService.mapPostContent(eq(POST_CONTENT_TYPE.toString()), eq(POST_TITLE), eq(POST_DESCRIPTION), + eq(POST_LINK), any(Media.class))).thenReturn(postContentObj); + + CreatePostInput input = new CreatePostInput(); + input.setPostType(PostType.HOME_PAGE_ANNOUNCEMENT); + input.setContent(content); + when(_dataFetchingEnvironment.getArgument(eq("input"))).thenReturn(input); + when(_postService.createPost(eq(PostType.HOME_PAGE_ANNOUNCEMENT.toString()), eq(postContentObj), + eq(_authentication))).thenReturn(true); + + assertTrue(_resolver.get(_dataFetchingEnvironment).join()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolverTest.java new file mode 100644 index 00000000000000..b4bec3ae9b3051 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolverTest.java @@ -0,0 +1,120 @@ +package com.linkedin.datahub.graphql.resolvers.post; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.Media; +import com.linkedin.common.MediaType; +import com.linkedin.common.url.Url; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.ListPostsInput; +import com.linkedin.datahub.graphql.generated.ListPostsResult; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.policy.DataHubRoleInfo; +import com.linkedin.post.PostContent; +import com.linkedin.post.PostContentType; +import com.linkedin.post.PostInfo; +import com.linkedin.post.PostType; +import graphql.schema.DataFetchingEnvironment; +import java.net.URISyntaxException; +import java.util.Map; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.Constants.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + + +public class ListPostsResolverTest { + private static Map _entityResponseMap; + private static final String POST_URN_STRING = "urn:li:post:examplePost"; + private static final MediaType POST_MEDIA_TYPE = MediaType.IMAGE; + private static final String POST_MEDIA_LOCATION = + "https://datahubproject.io/img/datahub-logo-color-light-horizontal.svg"; + private static final PostContentType POST_CONTENT_TYPE = PostContentType.LINK; + private static final String POST_TITLE = "title"; + private static final String POST_DESCRIPTION = "description"; + private static final String POST_LINK = "https://datahubproject.io"; + private static final Media MEDIA = new Media().setType(POST_MEDIA_TYPE).setLocation(new Url(POST_MEDIA_LOCATION)); + private static final PostContent POST_CONTENT = new PostContent().setType(POST_CONTENT_TYPE) + .setTitle(POST_TITLE) + .setDescription(POST_DESCRIPTION) + .setLink(new Url(POST_LINK)) + .setMedia(MEDIA); + private static final PostType POST_TYPE = PostType.HOME_PAGE_ANNOUNCEMENT; + + private EntityClient _entityClient; + private ListPostsResolver _resolver; + private DataFetchingEnvironment _dataFetchingEnvironment; + private Authentication _authentication; + + private Map getMockPostsEntityResponse() throws URISyntaxException { + Urn postUrn = Urn.createFromString(POST_URN_STRING); + + EntityResponse entityResponse = new EntityResponse().setUrn(postUrn); + PostInfo postInfo = new PostInfo(); + postInfo.setType(POST_TYPE); + postInfo.setContent(POST_CONTENT); + DataHubRoleInfo dataHubRoleInfo = new DataHubRoleInfo(); + dataHubRoleInfo.setDescription(postUrn.toString()); + dataHubRoleInfo.setName(postUrn.toString()); + entityResponse.setAspects(new EnvelopedAspectMap(ImmutableMap.of(DATAHUB_ROLE_INFO_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(dataHubRoleInfo.data()))))); + + return ImmutableMap.of(postUrn, entityResponse); + } + + @BeforeMethod + public void setupTest() throws Exception { + _entityResponseMap = getMockPostsEntityResponse(); + + _entityClient = mock(EntityClient.class); + _dataFetchingEnvironment = mock(DataFetchingEnvironment.class); + _authentication = mock(Authentication.class); + + _resolver = new ListPostsResolver(_entityClient); + } + + @Test + public void testNotAuthorizedFails() { + QueryContext mockContext = getMockDenyContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + + assertThrows(() -> _resolver.get(_dataFetchingEnvironment).join()); + } + + @Test + public void testListPosts() throws Exception { + QueryContext mockContext = getMockAllowContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + when(mockContext.getAuthentication()).thenReturn(_authentication); + + ListPostsInput input = new ListPostsInput(); + when(_dataFetchingEnvironment.getArgument("input")).thenReturn(input); + final SearchResult roleSearchResult = + new SearchResult().setMetadata(new SearchResultMetadata()).setFrom(0).setPageSize(10).setNumEntities(1); + roleSearchResult.setEntities( + new SearchEntityArray(ImmutableList.of(new SearchEntity().setEntity(Urn.createFromString(POST_URN_STRING))))); + + when(_entityClient.search(eq(POST_ENTITY_NAME), any(), eq(null), any(), anyInt(), anyInt(), + eq(_authentication))).thenReturn(roleSearchResult); + when(_entityClient.batchGetV2(eq(POST_ENTITY_NAME), any(), any(), any())).thenReturn(_entityResponseMap); + + ListPostsResult result = _resolver.get(_dataFetchingEnvironment).join(); + assertEquals(result.getStart(), 0); + assertEquals(result.getCount(), 10); + assertEquals(result.getTotal(), 1); + assertEquals(result.getPosts().size(), 1); + } +} diff --git a/datahub-web-react/src/app/home/HomePagePosts.tsx b/datahub-web-react/src/app/home/HomePagePosts.tsx new file mode 100644 index 00000000000000..97ef147f8dad62 --- /dev/null +++ b/datahub-web-react/src/app/home/HomePagePosts.tsx @@ -0,0 +1,62 @@ +import React from 'react'; +import { Divider, Typography } from 'antd'; +import styled from 'styled-components'; +import { useListPostsQuery } from '../../graphql/post.generated'; +import { Post, PostContentType } from '../../types.generated'; +import { PostTextCard } from '../search/PostTextCard'; +import { PostLinkCard } from '../search/PostLinkCard'; + +const RecommendationContainer = styled.div` + margin-bottom: 32px; + max-width: 1000px; + min-width: 750px; +`; + +const RecommendationTitle = styled(Typography.Title)` + margin-top: 0px; + margin-bottom: 0px; + padding: 0px; +`; + +const ThinDivider = styled(Divider)` + margin-top: 12px; + margin-bottom: 12px; +`; + +const LinkPostsContainer = styled.div` + display: flex; + flex-direction: row; +`; + +export const HomePagePosts = () => { + const { data: postsData } = useListPostsQuery({ + variables: { + input: { + start: 0, + count: 10, + }, + }, + }); + + const textPosts = + postsData?.listPosts?.posts?.filter((post) => post?.content?.contentType === PostContentType.Text) || []; + const linkPosts = + postsData?.listPosts?.posts?.filter((post) => post?.content?.contentType === PostContentType.Link) || []; + const hasPosts = textPosts.length > 0 || linkPosts.length > 0; + return hasPosts ? ( + + Pinned + + {textPosts.map((post) => ( + + ))} + + {linkPosts.map((post, index) => ( + + ))} + + + ) : ( + <> + ); +}; diff --git a/datahub-web-react/src/app/home/HomePageRecommendations.tsx b/datahub-web-react/src/app/home/HomePageRecommendations.tsx index ee65736efce09e..61e3d9a5b3e36a 100644 --- a/datahub-web-react/src/app/home/HomePageRecommendations.tsx +++ b/datahub-web-react/src/app/home/HomePageRecommendations.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useState } from 'react'; -import styled from 'styled-components'; +import styled from 'styled-components/macro'; import { Button, Divider, Empty, Typography } from 'antd'; import { RocketOutlined } from '@ant-design/icons'; import { @@ -16,6 +16,7 @@ import { useGetEntityCountsQuery } from '../../graphql/app.generated'; import { GettingStartedModal } from './GettingStartedModal'; import { ANTD_GRAY } from '../entity/shared/constants'; import { useGetAuthenticatedUser } from '../useGetAuthenticatedUser'; +import { HomePagePosts } from './HomePagePosts'; const RecommendationsContainer = styled.div` margin-top: 32px; @@ -139,6 +140,7 @@ export const HomePageRecommendations = ({ userUrn }: Props) => { return ( + {orderedEntityCounts && orderedEntityCounts.length > 0 && ( {domainRecommendationModule && ( diff --git a/datahub-web-react/src/app/search/PostLinkCard.tsx b/datahub-web-react/src/app/search/PostLinkCard.tsx new file mode 100644 index 00000000000000..5e780ccefb1f14 --- /dev/null +++ b/datahub-web-react/src/app/search/PostLinkCard.tsx @@ -0,0 +1,94 @@ +import React from 'react'; +// import { Link } from 'react-router-dom'; +import { Button, Image, Typography } from 'antd'; +import { ArrowRightOutlined } from '@ant-design/icons'; +import styled from 'styled-components/macro'; +import { ANTD_GRAY } from '../entity/shared/constants'; +import { Post } from '../../types.generated'; + +const CardContainer = styled(Button)<{ isLastCardInRow?: boolean }>` + display: flex; + flex-direction: row; + justify-content: space-between; + margin-right: ${(props) => (props.isLastCardInRow ? '0%' : '4%')}; + margin-left: 12px; + margin-bottom: 12px; + width: 29%; + height: 100px; + border: 1px solid ${ANTD_GRAY[4]}; + border-radius: 12px; + box-shadow: ${(props) => props.theme.styles['box-shadow']}; + &&:hover { + box-shadow: ${(props) => props.theme.styles['box-shadow-hover']}; + } + white-space: unset; +`; + +const LogoContainer = styled.div` + margin-top: 25px; + margin-left: 25px; + margin-right: 40px; +`; + +const PlatformLogo = styled(Image)` + width: auto; + object-fit: contain; + background-color: transparent; +`; + +const TextContainer = styled.div` + display: flex; + flex: 1; + justify-content: center; + align-items: start; + flex-direction: column; +`; + +const HeaderText = styled(Typography.Text)` + line-height: 10px; + margin-top: 12px; +`; + +const TitleDiv = styled.div` + display: flex; + justify-content: space-evenly; + align-items: center; + gap: 6px; + font-size: 14px; +`; + +const Title = styled(Typography.Title)` + word-break: break-word; +`; + +const NUM_CARDS_PER_ROW = 3; + +type Props = { + linkPost: Post; + index: number; +}; + +export const PostLinkCard = ({ linkPost, index }: Props) => { + const hasMedia = !!linkPost?.content?.media?.location; + const link = linkPost?.content?.link || ''; + const isLastCardInRow = (index + 1) % NUM_CARDS_PER_ROW === 0; + + return ( + + {hasMedia && ( + + + + )} + + Link + + <TitleDiv> + {linkPost?.content?.title} + <ArrowRightOutlined /> + </TitleDiv> + + + + ); +}; diff --git a/datahub-web-react/src/app/search/PostTextCard.tsx b/datahub-web-react/src/app/search/PostTextCard.tsx new file mode 100644 index 00000000000000..11079f2a379f49 --- /dev/null +++ b/datahub-web-react/src/app/search/PostTextCard.tsx @@ -0,0 +1,65 @@ +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components/macro'; +import { ANTD_GRAY } from '../entity/shared/constants'; +import { Post } from '../../types.generated'; + +const CardContainer = styled.div` + display: flex; + flex-direction: row; + margin-right: 12px; + margin-left: 12px; + margin-bottom: 12px; + height: 140px; + border: 1px solid ${ANTD_GRAY[4]}; + border-radius: 12px; + box-shadow: ${(props) => props.theme.styles['box-shadow']}; + &&:hover { + box-shadow: ${(props) => props.theme.styles['box-shadow-hover']}; + } + white-space: unset; +`; + +const TextContainer = styled.div` + margin-left: 12px; + display: flex; + justify-content: center; + align-items: start; + flex-direction: column; +`; + +const Title = styled(Typography.Title)` + word-break: break-word; +`; + +const HeaderText = styled(Typography.Text)` + margin-top: 12px; +`; + +const AnnouncementText = styled(Typography.Paragraph)` + font-size: 12px; + color: ${ANTD_GRAY[7]}; +`; + +type Props = { + textPost: Post; +}; + +export const PostTextCard = ({ textPost }: Props) => { + return ( + + + Announcement + + {textPost?.content?.title} + + {textPost?.content?.description} + + + ); +}; diff --git a/datahub-web-react/src/graphql/mutations.graphql b/datahub-web-react/src/graphql/mutations.graphql index 105289fe9bb050..7d1fa0c7cd9446 100644 --- a/datahub-web-react/src/graphql/mutations.graphql +++ b/datahub-web-react/src/graphql/mutations.graphql @@ -114,4 +114,8 @@ mutation createInviteToken($input: CreateInviteTokenInput!) { mutation acceptRole($input: AcceptRoleInput!) { acceptRole(input: $input) +} + +mutation createPost($input: CreatePostInput!) { + createPost(input: $input) } \ No newline at end of file diff --git a/datahub-web-react/src/graphql/post.graphql b/datahub-web-react/src/graphql/post.graphql new file mode 100644 index 00000000000000..c19f38fc7751c1 --- /dev/null +++ b/datahub-web-react/src/graphql/post.graphql @@ -0,0 +1,22 @@ +query listPosts($input: ListPostsInput!) { + listPosts(input: $input) { + start + count + total + posts { + urn + type + postType + content { + contentType + title + description + link + media { + type + location + } + } + } + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index adee116295d44b..c9e4645dee894f 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -47,6 +47,7 @@ public class Constants { public static final String DATA_HUB_UPGRADE_ENTITY_NAME = "dataHubUpgrade"; public static final String INVITE_TOKEN_ENTITY_NAME = "inviteToken"; public static final String DATAHUB_ROLE_ENTITY_NAME = "dataHubRole"; + public static final String POST_ENTITY_NAME = "post"; /** @@ -243,7 +244,6 @@ public class Constants { public static final String IS_MEMBER_OF_GROUP_RELATIONSHIP_NAME = "IsMemberOfGroup"; public static final String IS_MEMBER_OF_NATIVE_GROUP_RELATIONSHIP_NAME = "IsMemberOfNativeGroup"; - // acryl-main only public static final String CHANGE_EVENT_PLATFORM_EVENT_NAME = "entityChangeEvent"; /** @@ -258,6 +258,10 @@ public class Constants { public static final String DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME = "dataProcessInstanceProperties"; public static final String DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME = "dataProcessInstanceRunEvent"; + // Posts + public static final String POST_INFO_ASPECT_NAME = "postInfo"; + public static final String LAST_MODIFIED_FIELD_NAME = "lastModified"; + private Constants() { } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/Media.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/Media.pdl new file mode 100644 index 00000000000000..e912c1a6707c7c --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/Media.pdl @@ -0,0 +1,16 @@ +namespace com.linkedin.common + +/** + * Carries information about which roles a user is assigned to. + */ +record Media { + /** + * Type of content the Media is storing, e.g. image, video, etc. + */ + type: MediaType + + /** + * Where the media content is stored. + */ + location: Url +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/MediaType.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/MediaType.pdl new file mode 100644 index 00000000000000..6d10e28fcc1781 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/MediaType.pdl @@ -0,0 +1,11 @@ +namespace com.linkedin.common + +/** + * Enum defining the type of content a Media object holds. + */ +enum MediaType { + /** + * The Media holds an image. + */ + IMAGE +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/PostKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/PostKey.pdl new file mode 100644 index 00000000000000..f5a74c64d0ef85 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/PostKey.pdl @@ -0,0 +1,14 @@ +namespace com.linkedin.metadata.key + +/** + * Key for a Post. + */ +@Aspect = { + "name": "postKey" +} +record PostKey { + /** + * A unique id for the DataHub Post record. Generated on the server side at Post creation time. + */ + id: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/post/PostContent.pdl b/metadata-models/src/main/pegasus/com/linkedin/post/PostContent.pdl new file mode 100644 index 00000000000000..0ef537010b876c --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/post/PostContent.pdl @@ -0,0 +1,37 @@ +namespace com.linkedin.post + +import com.linkedin.common.Media +import com.linkedin.common.Url + +/** + * Content stored inside a Post. + */ +record PostContent { + /** + * Title of the post. + */ + @Searchable = { + "fieldType": "TEXT_PARTIAL" + } + title: string + + /** + * Type of content held in the post. + */ + type: PostContentType + + /** + * Optional description of the post. + */ + description: optional string + + /** + * Optional link that the post is associated with. + */ + link: optional Url + + /** + * Optional media that the post is storing + */ + media: optional Media +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/post/PostContentType.pdl b/metadata-models/src/main/pegasus/com/linkedin/post/PostContentType.pdl new file mode 100644 index 00000000000000..dbedfbf5320a13 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/post/PostContentType.pdl @@ -0,0 +1,16 @@ +namespace com.linkedin.post + +/** + * Enum defining the type of content held in a Post. + */ +enum PostContentType { + /** + * Text content + */ + TEXT + + /** + * Link content + */ + LINK +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/post/PostInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/post/PostInfo.pdl new file mode 100644 index 00000000000000..7eb080653e9c33 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/post/PostInfo.pdl @@ -0,0 +1,35 @@ +namespace com.linkedin.post + +/** + * Information about a DataHub Post. + */ +@Aspect = { + "name": "postInfo" +} +record PostInfo { + /** + * Type of the Post. + */ + type: PostType + + /** + * Content stored in the post. + */ + content: PostContent + + /** + * The time at which the post was initially created + */ + @Searchable = { + "fieldType": "COUNT" + } + created: long + + /** + * The time at which the post was last modified + */ + @Searchable = { + "fieldType": "COUNT" + } + lastModified: long +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/post/PostType.pdl b/metadata-models/src/main/pegasus/com/linkedin/post/PostType.pdl new file mode 100644 index 00000000000000..43aa851f0efeb5 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/post/PostType.pdl @@ -0,0 +1,11 @@ +namespace com.linkedin.post + +/** + * Enum defining types of Posts. + */ +enum PostType { + /** + * The Post is an Home Page announcement. + */ + HOME_PAGE_ANNOUNCEMENT +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 42ac08270db8b5..842ad8c95b272c 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -259,4 +259,9 @@ entities: keyAspect: dataHubRoleKey aspects: - dataHubRoleInfo + - name: post + category: core + keyAspect: postKey + aspects: + - postInfo events: diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/post/PostService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/post/PostService.java new file mode 100644 index 00000000000000..8295997699bb90 --- /dev/null +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/post/PostService.java @@ -0,0 +1,71 @@ +package com.datahub.authentication.post; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.Media; +import com.linkedin.common.MediaType; +import com.linkedin.common.url.Url; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.key.PostKey; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.post.PostContent; +import com.linkedin.post.PostContentType; +import com.linkedin.post.PostInfo; +import com.linkedin.post.PostType; +import com.linkedin.r2.RemoteInvocationException; +import java.time.Instant; +import java.util.UUID; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.entity.AspectUtils.*; + + +@Slf4j +@RequiredArgsConstructor +public class PostService { + private final EntityClient _entityClient; + + @Nonnull + public Media mapMedia(@Nonnull String type, @Nonnull String location) { + final Media media = new Media(); + media.setType(MediaType.valueOf(type)); + media.setLocation(new Url(location)); + return media; + } + + @Nonnull + public PostContent mapPostContent(@Nonnull String contentType, @Nonnull String title, @Nullable String description, @Nullable String link, + @Nullable Media media) { + final PostContent postContent = new PostContent().setType(PostContentType.valueOf(contentType)).setTitle(title); + if (description != null) { + postContent.setDescription(description); + } + if (link != null) { + postContent.setLink(new Url(link)); + } + if (media != null) { + postContent.setMedia(media); + } + return postContent; + } + + public boolean createPost(@Nonnull String postType, @Nonnull PostContent postContent, + @Nonnull Authentication authentication) throws RemoteInvocationException { + final String uuid = UUID.randomUUID().toString(); + final PostKey postKey = new PostKey().setId(uuid); + final long currentTimeMillis = Instant.now().toEpochMilli(); + final PostInfo postInfo = new PostInfo().setType(PostType.valueOf(postType)) + .setContent(postContent) + .setCreated(currentTimeMillis) + .setLastModified(currentTimeMillis); + + final MetadataChangeProposal proposal = + buildMetadataChangeProposal(POST_ENTITY_NAME, postKey, POST_INFO_ASPECT_NAME, postInfo); + _entityClient.ingestProposal(proposal, authentication); + + return true; + } +} diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/post/PostServiceTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/post/PostServiceTest.java new file mode 100644 index 00000000000000..dfc5a2ac51f859 --- /dev/null +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/post/PostServiceTest.java @@ -0,0 +1,66 @@ +package com.datahub.authentication.post; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.linkedin.common.Media; +import com.linkedin.common.MediaType; +import com.linkedin.common.url.Url; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.post.PostContent; +import com.linkedin.post.PostContentType; +import com.linkedin.post.PostType; +import com.linkedin.r2.RemoteInvocationException; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + + +public class PostServiceTest { + private static final MediaType POST_MEDIA_TYPE = MediaType.IMAGE; + private static final String POST_MEDIA_LOCATION = + "https://datahubproject.io/img/datahub-logo-color-light-horizontal.svg"; + private static final PostContentType POST_CONTENT_TYPE = PostContentType.LINK; + private static final String POST_TITLE = "title"; + private static final String POST_DESCRIPTION = "description"; + private static final String POST_LINK = "https://datahubproject.io"; + private static final Media MEDIA = new Media().setType(POST_MEDIA_TYPE).setLocation(new Url(POST_MEDIA_LOCATION)); + private static final PostContent POST_CONTENT = new PostContent().setType(POST_CONTENT_TYPE) + .setTitle(POST_TITLE) + .setDescription(POST_DESCRIPTION) + .setLink(new Url(POST_LINK)) + .setMedia(MEDIA); + private static final PostType POST_TYPE = PostType.HOME_PAGE_ANNOUNCEMENT; + private static final String DATAHUB_SYSTEM_CLIENT_ID = "__datahub_system"; + private static final Authentication SYSTEM_AUTHENTICATION = + new Authentication(new Actor(ActorType.USER, DATAHUB_SYSTEM_CLIENT_ID), ""); + private EntityClient _entityClient; + private PostService _postService; + + @BeforeMethod + public void setupTest() { + _entityClient = mock(EntityClient.class); + _postService = new PostService(_entityClient); + } + + @Test + public void testMapMedia() { + Media media = _postService.mapMedia(POST_MEDIA_TYPE.toString(), POST_MEDIA_LOCATION); + assertEquals(MEDIA, media); + } + + @Test + public void testMapPostContent() { + PostContent postContent = + _postService.mapPostContent(POST_CONTENT_TYPE.toString(), POST_TITLE, POST_DESCRIPTION, POST_LINK, MEDIA); + assertEquals(POST_CONTENT, postContent); + } + + @Test + public void testCreatePost() throws RemoteInvocationException { + _postService.createPost(POST_TYPE.toString(), POST_CONTENT, SYSTEM_AUTHENTICATION); + verify(_entityClient, times(1)).ingestProposal(any(), eq(SYSTEM_AUTHENTICATION)); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java new file mode 100644 index 00000000000000..8e5e5e5cfc6678 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java @@ -0,0 +1,28 @@ +package com.linkedin.gms.factory.auth; + +import com.datahub.authentication.post.PostService; +import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.client.JavaEntityClient; +import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.PropertySource; +import org.springframework.context.annotation.Scope; + + +@Configuration +@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) +public class PostServiceFactory { + @Autowired + @Qualifier("javaEntityClient") + private JavaEntityClient _javaEntityClient; + + @Bean(name = "postService") + @Scope("singleton") + @Nonnull + protected PostService getInstance() throws Exception { + return new PostService(this._javaEntityClient); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 5f50233c6a486b..768f09c784def7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -2,6 +2,7 @@ import com.datahub.authentication.group.GroupService; import com.datahub.authentication.invite.InviteTokenService; +import com.datahub.authentication.post.PostService; import com.datahub.authentication.token.StatefulTokenService; import com.datahub.authentication.user.NativeUserService; import com.datahub.authorization.role.RoleService; @@ -123,6 +124,10 @@ public class GraphQLEngineFactory { @Qualifier("inviteTokenService") private InviteTokenService _inviteTokenService; + @Autowired + @Qualifier("postService") + private PostService _postService; + @Value("${platformAnalytics.enabled}") // TODO: Migrate to DATAHUB_ANALYTICS_ENABLED private Boolean isAnalyticsEnabled; @@ -157,6 +162,7 @@ protected GraphQLEngine getInstance() { _groupService, _roleService, _inviteTokenService, + _postService, _configProvider.getFeatureFlags() ).builder().build(); } @@ -186,6 +192,7 @@ protected GraphQLEngine getInstance() { _groupService, _roleService, _inviteTokenService, + _postService, _configProvider.getFeatureFlags() ).builder().build(); } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 28946c5301bf15..fcf2435b3d9601 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -93,6 +93,11 @@ public class PoliciesConfig { "Create Domains", "Create new Domains."); + public static final Privilege CREATE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE = Privilege.of( + "CREATE_GLOBAL_ANNOUNCEMENTS", + "Create Global Announcements", + "Create new Global Announcements."); + public static final List PLATFORM_PRIVILEGES = ImmutableList.of( MANAGE_POLICIES_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE, @@ -107,7 +112,7 @@ public class PoliciesConfig { MANAGE_USER_CREDENTIALS_PRIVILEGE, MANAGE_TAGS_PRIVILEGE, CREATE_TAGS_PRIVILEGE, - CREATE_DOMAINS_PRIVILEGE + CREATE_DOMAINS_PRIVILEGE, CREATE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE ); // Resource Privileges // From feb9d59b4d9c284bf4d8eaf83dd243d0bc8deeca Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 5 Oct 2022 03:26:51 +0000 Subject: [PATCH 33/76] fix(ingest): remove unused mysql golden file (#6106) --- .../source/sql/two_tier_sql_source.py | 7 +- .../integration/mysql/mysql_mces_golden.json | 2037 ----------------- 2 files changed, 5 insertions(+), 2039 deletions(-) delete mode 100644 metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index b9c6d18dac74b8..c62c9c88f88543 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -51,12 +51,12 @@ def get_sql_alchemy_url( class TwoTierSQLAlchemySource(SQLAlchemySource): def __init__(self, config, ctx, platform): super().__init__(config, ctx, platform) - self.current_database = None self.config: TwoTierSQLAlchemyConfig = config def get_parent_container_key(self, db_name: str, schema: str) -> PlatformKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. + assert db_name == schema return self.gen_database_key(db_name) def get_allowed_schemas( @@ -66,6 +66,10 @@ def get_allowed_schemas( # dbName itself as an allowed schema yield db_name + def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: + # Sanity check that we don't try to generate schema containers for 2 tier databases. + raise NotImplementedError + def get_inspectors(self): # This method can be overridden in the case that you want to dynamically # run on multiple databases. @@ -84,7 +88,6 @@ def get_inspectors(self): inspector = inspect( create_engine(url, **self.config.options).connect() ) - self.current_database = db yield inspector def gen_schema_containers( diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json deleted file mode 100644 index 5732e82e1eb7c3..00000000000000 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_golden.json +++ /dev/null @@ -1,2037 +0,0 @@ -[ -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:17751259af32dd0385cad799df608c40", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"metagalaxy\"}, \"name\": \"metagalaxy\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:17751259af32dd0385cad799df608c40", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:mysql\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:17751259af32dd0385cad799df608c40", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"Database\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:17751259af32dd0385cad799df608c40", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "domains", - "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:ba408413d97771e6470c16f9869f2e0d", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"metagalaxy\", \"schema\": \"datacharmer\"}, \"name\": \"datacharmer\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:ba408413d97771e6470c16f9869f2e0d", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:mysql\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:ba408413d97771e6470c16f9869f2e0d", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"Schema\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:ba408413d97771e6470c16f9869f2e0d", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:17751259af32dd0385cad799df608c40\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:ba408413d97771e6470c16f9869f2e0d\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "employees", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "datacharmer.employees", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "emp_no", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "birth_date", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=14)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=16)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "gender", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.EnumType": {} - } - }, - "nativeDataType": "ENUM('M', 'F')", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "hire_date", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:ba408413d97771e6470c16f9869f2e0d\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "salaries", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "datacharmer.salaries", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "emp_no", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "salary", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "from_date", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "to_date", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:593ea3998729fdae4bdfb42206561a3a", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"metagalaxy\", \"schema\": \"metagalaxy\"}, \"name\": \"metagalaxy\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:593ea3998729fdae4bdfb42206561a3a", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:mysql\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:593ea3998729fdae4bdfb42206561a3a", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"Schema\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:593ea3998729fdae4bdfb42206561a3a", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:17751259af32dd0385cad799df608c40\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:593ea3998729fdae4bdfb42206561a3a\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "metadata_aspect", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "metagalaxy.metadata_aspect", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "urn", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=500)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "aspect", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=200)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "version", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "BIGINT()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "metadata", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "LONGTEXT()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "createdon", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "DATETIME(fsp=6)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "createdby", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=255)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "createdfor", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=255)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_aspect,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "domains", - "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:593ea3998729fdae4bdfb42206561a3a\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "metadata_index", - "qualifiedName": null, - "description": "This is a table comment", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "metagalaxy.metadata_index", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "BIGINT()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "urn", - "jsonPath": null, - "nullable": false, - "description": "This is a column comment about URNs", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=200)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "aspect", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=150)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "path", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=150)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "longVal", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "BIGINT()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "stringVal", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=200)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "doubleVal", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "DOUBLE(asdecimal=True)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "domains", - "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:593ea3998729fdae4bdfb42206561a3a\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "view_definition": "CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `metadata_index_view` AS select `metadata_index`.`id` AS `id`,`metadata_index`.`urn` AS `urn`,`metadata_index`.`path` AS `path`,`metadata_index`.`doubleVal` AS `doubleVal` from `metadata_index`", - "is_view": "True" - }, - "externalUrl": null, - "name": "metadata_index_view", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "metagalaxy.metadata_index_view", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "BIGINT()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "urn", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=200)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "path", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=150)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "doubleVal", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "DOUBLE(asdecimal=True)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "value": "{\"materialized\": false, \"viewLogic\": \"CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `metadata_index_view` AS select `metadata_index`.`id` AS `id`,`metadata_index`.`urn` AS `urn`,`metadata_index`.`path` AS `path`,`metadata_index`.`doubleVal` AS `doubleVal` from `metadata_index`\", \"viewLanguage\": \"SQL\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "domains", - "aspect": { - "value": "{\"domains\": [\"urn:li:domain:sales\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:36bfb6eae3f7972efbcb56dedecdfba6", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"metagalaxy\", \"schema\": \"northwind\"}, \"name\": \"northwind\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:36bfb6eae3f7972efbcb56dedecdfba6", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:mysql\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:36bfb6eae3f7972efbcb56dedecdfba6", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"Schema\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:36bfb6eae3f7972efbcb56dedecdfba6", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:17751259af32dd0385cad799df608c40\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:36bfb6eae3f7972efbcb56dedecdfba6\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "customers", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "northwind.customers", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "company", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=50)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=50)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=50)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "email_address", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=50)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "priority", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:36bfb6eae3f7972efbcb56dedecdfba6\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "orders", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "northwind.orders", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": true, - "jsonProps": null - }, - { - "fieldPath": "description", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=50)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - }, - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": false, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INTEGER()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": [ - { - "name": "fk_order_customer", - "foreignFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),id)" - ], - "sourceFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),customer_id)" - ], - "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)" - } - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:989c003cbe689094c2b5c340a67f62be", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "value": "{\"customProperties\": {\"platform\": \"mysql\", \"instance\": \"PROD\", \"database\": \"metagalaxy\", \"schema\": \"test_cases\"}, \"name\": \"test_cases\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:989c003cbe689094c2b5c340a67f62be", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:mysql\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:989c003cbe689094c2b5c340a67f62be", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"Schema\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "container", - "entityUrn": "urn:li:container:989c003cbe689094c2b5c340a67f62be", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:17751259af32dd0385cad799df608c40\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "value": "{\"container\": \"urn:li:container:989c003cbe689094c2b5c340a67f62be\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "test_empty", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "test_cases.test_empty", - "platform": "urn:li:dataPlatform:mysql", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "dummy", - "jsonPath": null, - "nullable": true, - "description": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "VARCHAR(length=50)", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.employees,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,datacharmer.salaries,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 112, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 0.08928571428571429, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"10001\", \"max\": \"10010\", \"mean\": \"10005.3125\", \"median\": \"10005.0\", \"stdev\": \"2.834889609688869\", \"distinctValueFrequencies\": [{\"value\": \"10001\", \"frequency\": 17}, {\"value\": \"10002\", \"frequency\": 6}, {\"value\": \"10003\", \"frequency\": 7}, {\"value\": \"10004\", \"frequency\": 16}, {\"value\": \"10005\", \"frequency\": 13}, {\"value\": \"10006\", \"frequency\": 12}, {\"value\": \"10007\", \"frequency\": 14}, {\"value\": \"10008\", \"frequency\": 3}, {\"value\": \"10009\", \"frequency\": 18}, {\"value\": \"10010\", \"frequency\": 6}], \"sampleValues\": [\"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10001\", \"10002\", \"10002\", \"10002\"]}, {\"fieldPath\": \"salary\", \"uniqueCount\": 111, \"uniqueProportion\": 0.9910714285714286, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"40000\", \"max\": \"94692\", \"mean\": \"68303.11607142857\", \"median\": \"69544.0\", \"stdev\": \"15505.291475014095\", \"sampleValues\": [\"60117\", \"62102\", \"66074\", \"66596\", \"66961\", \"71046\", \"74333\", \"75286\", \"75994\", \"76884\", \"80013\", \"81025\", \"81097\", \"84917\", \"85112\", \"85097\", \"88958\", \"65909\", \"65909\", \"67534\"]}, {\"fieldPath\": \"from_date\", \"uniqueCount\": 106, \"uniqueProportion\": 0.9464285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"2002-06-22\", \"sampleValues\": [\"1986-06-26\", \"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"1996-08-03\", \"1997-08-03\", \"1998-08-03\"]}, {\"fieldPath\": \"to_date\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8839285714285714, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1986-02-18\", \"max\": \"9999-01-01\", \"sampleValues\": [\"1987-06-26\", \"1988-06-25\", \"1989-06-25\", \"1990-06-25\", \"1991-06-25\", \"1992-06-24\", \"1993-06-24\", \"1994-06-24\", \"1995-06-24\", \"1996-06-23\", \"1997-06-23\", \"1998-06-23\", \"1999-06-23\", \"2000-06-22\", \"2001-06-22\", \"2002-06-22\", \"9999-01-01\", \"1997-08-03\", \"1998-08-03\", \"1999-08-03\"]}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1.0, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -}, -{ - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "mysql-test", - "registryName": null, - "registryVersion": null, - "properties": null - } -} -] \ No newline at end of file From 055e4082da1d47990d54f34441669a226ddaea64 Mon Sep 17 00:00:00 2001 From: Ravindra Lanka Date: Tue, 4 Oct 2022 20:40:59 -0700 Subject: [PATCH 34/76] fix(ingestion): fix percent change computation in stale_entity_removal (#6121) --- .../state/stale_entity_removal_handler.py | 6 +-- .../test_stale_entity_removal_handler.py | 51 +++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py index c6e606fa745259..232197f3e2f5de 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py @@ -35,7 +35,7 @@ class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig): description=f"Soft-deletes the entities of type {', '.join(_entity_types)} in the last successful run but missing in the current run with stateful_ingestion enabled.", ) fail_safe_threshold: float = pydantic.Field( - default=95.0, + default=20.0, description="Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'.", le=100.0, # mypy does not work with pydantic.confloat. This is the recommended work-around. ge=0.0, @@ -94,7 +94,7 @@ def get_percent_entities_changed(self, old_checkpoint_state: Derived) -> float: """ Returns the percentage of entities that have changed relative to `old_checkpoint_state`. :param old_checkpoint_state: the old checkpoint state to compute the relative change percent against. - :return: (|intersection(self, old_checkpoint_state)| * 100.0 / |old_checkpoint_state|) + :return: (1-|intersection(self, old_checkpoint_state)| / |old_checkpoint_state|) * 100.0 """ pass @@ -115,7 +115,7 @@ def compute_percent_entities_changed( overlap_count_all += overlap_count old_count_all += old_count if old_count_all: - return overlap_count * 100.0 / old_count_all + return (1 - overlap_count / old_count_all) * 100.0 return 0.0 @staticmethod diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py new file mode 100644 index 00000000000000..cfada6c3a4997d --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stale_entity_removal_handler.py @@ -0,0 +1,51 @@ +from typing import Dict, List, Tuple + +import pytest + +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityCheckpointStateBase, +) + +OldNewEntLists = List[Tuple[List[str], List[str]]] + +old_new_ent_tests: Dict[str, Tuple[OldNewEntLists, float]] = { + "no_change_empty_old_and_new": ([([], [])], 0.0), + "no_change_empty_old_and_non_empty_new": ([(["a"], [])], 0.0), + "no_change_non_empty_old_new_equals_old": ( + [(["a", "b", "c"], ["c", "b", "a"])], + 0.0, + ), + "no_change_non_empty_old_new_superset_old": ( + [(["a", "b", "c", "d"], ["c", "b", "a"])], + 0.0, + ), + "change_25_percent_delta": ([(["a", "b", "c"], ["d", "c", "b", "a"])], 25.0), + "change_50_percent_delta": ( + [ + ( + ["b", "a"], + ["a", "b", "c", "d"], + ) + ], + 50.0, + ), + "change_75_percent_delta": ([(["a"], ["a", "b", "c", "d"])], 75.0), + "change_100_percent_delta_empty_new": ([([], ["a", "b", "c", "d"])], 100.0), + "change_100_percent_delta_non_empty_new": ([(["e"], ["a", "b", "c", "d"])], 100.0), +} + + +@pytest.mark.parametrize( + "new_old_entity_list, expected_percent_change", + old_new_ent_tests.values(), + ids=old_new_ent_tests.keys(), +) +def test_change_percent( + new_old_entity_list: OldNewEntLists, expected_percent_change: float +) -> None: + actual_percent_change = ( + StaleEntityCheckpointStateBase.compute_percent_entities_changed( + new_old_entity_list + ) + ) + assert actual_percent_change == expected_percent_change From 95afc1d73ffc8cac80bd82ada97004ae25db832d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 5 Oct 2022 03:53:18 +0000 Subject: [PATCH 35/76] refactor(ingest): use pydantic utilities for NamingPattern (#6013) * refactor(ingest): use pydantic utilities for NamingPattern * clean up replacement logic * flatten config hierarchy --- .../ingestion/source/looker/looker_common.py | 214 ++++++++---------- 1 file changed, 95 insertions(+), 119 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index b6e561c781506f..29548db7a78b6e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -1,17 +1,18 @@ from __future__ import print_function +import dataclasses import datetime import logging import re from dataclasses import dataclass, field as dataclasses_field from enum import Enum from functools import lru_cache -from typing import Dict, Iterable, List, Optional, Tuple, Union +from typing import ClassVar, Dict, Iterable, List, Optional, Tuple, Union import pydantic from looker_sdk.error import SDKError from looker_sdk.sdk.api31.models import User, WriteQuery -from pydantic import BaseModel, Field +from pydantic import Field from pydantic.class_validators import validator import datahub.emitter.mce_builder as builder @@ -73,94 +74,92 @@ logger = logging.getLogger(__name__) -# @dataclass -class NamingPattern(BaseModel): - allowed_vars: List[str] +class NamingPattern(ConfigModel): + ALLOWED_VARS: ClassVar[List[str]] = [] + REQUIRE_AT_LEAST_ONE_VAR: ClassVar[bool] = True + pattern: str - variables: Optional[List[str]] = None + + @classmethod + def __get_validators__(cls): + yield cls.pydantic_accept_raw_pattern + yield cls.validate + yield cls.pydantic_validate_pattern + + @classmethod + def pydantic_accept_raw_pattern(cls, v): + if isinstance(v, (NamingPattern, dict)): + return v + assert isinstance(v, str), "pattern must be a string" + return {"pattern": v} + + @classmethod + def pydantic_validate_pattern(cls, v): + assert isinstance(v, NamingPattern) + assert v.validate_pattern(cls.REQUIRE_AT_LEAST_ONE_VAR) + return v + + @classmethod + def allowed_docstring(cls) -> str: + return f"Allowed variables are {cls.ALLOWED_VARS}" def validate_pattern(self, at_least_one: bool) -> bool: variables = re.findall("({[^}{]+})", self.pattern) - self.variables = [v[1:-1] for v in variables] + + variables = [v[1:-1] for v in variables] # remove the {} + for v in variables: - if v[1:-1] not in self.allowed_vars: + if v not in self.ALLOWED_VARS: raise ConfigurationError( - f"Failed to find {v} in allowed_variables {self.allowed_vars}" + f"Failed to find {v} in allowed_variables {self.ALLOWED_VARS}" ) if at_least_one and len(variables) == 0: raise ConfigurationError( - f"Failed to find any variable assigned to pattern {self.pattern}. Must have at least one. Allowed variables are {self.allowed_vars}" + f"Failed to find any variable assigned to pattern {self.pattern}. Must have at least one. {self.allowed_docstring()}" ) return True - -naming_pattern_variables: List[str] = [ - "platform", - "env", - "project", - "model", - "name", -] + def replace_variables(self, values: Union[Dict[str, Optional[str]], object]) -> str: + if not isinstance(values, dict): + assert dataclasses.is_dataclass(values) + values = dataclasses.asdict(values) + values = {k: v for k, v in values.items() if v is not None} + return self.pattern.format(**values) -class LookerExploreNamingConfig(ConfigModel): - explore_naming_pattern: NamingPattern = pydantic.Field( - description="Pattern for providing dataset names to explores. Allowed variables are {project}, {model}, {name}. Default is `{model}.explore.{name}`", - default=NamingPattern( - allowed_vars=naming_pattern_variables, pattern="{model}.explore.{name}" - ), - ) - explore_browse_pattern: NamingPattern = NamingPattern( - allowed_vars=naming_pattern_variables, - pattern="/{env}/{platform}/{project}/explores", - ) +@dataclass +class NamingPatternMapping: + platform: str + env: str + project: str + model: str + name: str - @validator("explore_naming_pattern", "explore_browse_pattern", pre=True) - def init_naming_pattern(cls, v): - if isinstance(v, NamingPattern): - return v - assert isinstance(v, str), "pattern must be a string" - return NamingPattern(allowed_vars=naming_pattern_variables, pattern=v) - @validator("explore_naming_pattern", "explore_browse_pattern", always=True) - def validate_naming_pattern(cls, v): - assert isinstance(v, NamingPattern) - v.validate_pattern(at_least_one=True) - return v +class LookerNamingPattern(NamingPattern): + ALLOWED_VARS = [field.name for field in dataclasses.fields(NamingPatternMapping)] -class LookerViewNamingConfig(ConfigModel): - view_naming_pattern: NamingPattern = Field( - NamingPattern( - allowed_vars=naming_pattern_variables, pattern="{project}.view.{name}" - ), - description="Pattern for providing dataset names to views. Allowed variables are `{project}`, `{model}`, `{name}`", - ) - view_browse_pattern: NamingPattern = Field( - NamingPattern( - allowed_vars=naming_pattern_variables, - pattern="/{env}/{platform}/{project}/views", - ), - description="Pattern for providing browse paths to views. Allowed variables are `{project}`, `{model}`, `{name}`, `{platform}` and `{env}`", +class LookerCommonConfig(DatasetSourceConfigBase): + explore_naming_pattern: LookerNamingPattern = pydantic.Field( + description=f"Pattern for providing dataset names to explores. {LookerNamingPattern.allowed_docstring()}", + default=LookerNamingPattern(pattern="{model}.explore.{name}"), ) - @validator("view_naming_pattern", "view_browse_pattern", pre=True) - def init_naming_pattern(cls, v): - if isinstance(v, NamingPattern): - return v - assert isinstance(v, str), "pattern must be a string" - return NamingPattern(allowed_vars=naming_pattern_variables, pattern=v) - - @validator("view_naming_pattern", "view_browse_pattern", always=True) - def validate_naming_pattern(cls, v): - assert isinstance(v, NamingPattern) - v.validate_pattern(at_least_one=True) - return v + explore_browse_pattern: LookerNamingPattern = pydantic.Field( + description=f"Pattern for providing browse paths to explores. {LookerNamingPattern.allowed_docstring()}", + default=LookerNamingPattern(pattern="/{env}/{platform}/{project}/explores"), + ) + view_naming_pattern: LookerNamingPattern = Field( + LookerNamingPattern(pattern="{project}.view.{name}"), + description=f"Pattern for providing dataset names to views. {LookerNamingPattern.allowed_docstring()}", + ) + view_browse_pattern: LookerNamingPattern = Field( + LookerNamingPattern(pattern="/{env}/{platform}/{project}/views"), + description=f"Pattern for providing browse paths to views. {LookerNamingPattern.allowed_docstring()}", + ) -class LookerCommonConfig( - LookerViewNamingConfig, LookerExploreNamingConfig, DatasetSourceConfigBase -): tag_measures_and_dimensions: bool = Field( True, description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.", @@ -184,19 +183,14 @@ class LookerViewId: model_name: str view_name: str - def get_mapping(self, variable: str, config: LookerCommonConfig) -> str: - assert variable in naming_pattern_variables - if variable == "project": - return self.project_name - if variable == "model": - return self.model_name - if variable == "name": - return self.view_name - if variable == "env": - return config.env.lower() - if variable == "platform": - return config.platform_name - assert False, "Unreachable code" + def get_mapping(self, config: LookerCommonConfig) -> NamingPatternMapping: + return NamingPatternMapping( + platform=config.platform_name, + env=config.env.lower(), + project=self.project_name, + model=self.model_name, + name=self.view_name, + ) @validator("view_name") def remove_quotes(cls, v): @@ -205,12 +199,9 @@ def remove_quotes(cls, v): return v def get_urn(self, config: LookerCommonConfig) -> str: - dataset_name = config.view_naming_pattern.pattern - assert config.view_naming_pattern.variables is not None - for v in config.view_naming_pattern.variables: - dataset_name = dataset_name.replace( - "{" + v + "}", self.get_mapping(v, config) - ) + dataset_name = config.view_naming_pattern.replace_variables( + self.get_mapping(config) + ) return builder.make_dataset_urn_with_platform_instance( platform=config.platform_name, @@ -220,12 +211,9 @@ def get_urn(self, config: LookerCommonConfig) -> str: ) def get_browse_path(self, config: LookerCommonConfig) -> str: - browse_path = config.view_browse_pattern.pattern - assert config.view_browse_pattern.variables is not None - for v in config.view_browse_pattern.variables: - browse_path = browse_path.replace( - "{" + v + "}", self.get_mapping(v, config) - ) + browse_path = config.view_browse_pattern.replace_variables( + self.get_mapping(config) + ) return browse_path @@ -683,28 +671,19 @@ def from_api( # noqa: C901 ) return None - def get_mapping(self, variable: str, config: LookerCommonConfig) -> str: - assert variable in naming_pattern_variables - if variable == "project": - assert self.project_name is not None - return self.project_name - if variable == "model": - return self.model_name - if variable == "name": - return self.name - if variable == "env": - return config.env.lower() - if variable == "platform": - return config.platform_name - assert False, "Unreachable code" + def get_mapping(self, config: LookerCommonConfig) -> NamingPatternMapping: + return NamingPatternMapping( + platform=config.platform_name, + project=self.project_name, # type: ignore + model=self.model_name, + name=self.name, + env=config.env.lower(), + ) def get_explore_urn(self, config: LookerCommonConfig) -> str: - dataset_name = config.explore_naming_pattern.pattern - assert config.explore_naming_pattern.variables is not None - for v in config.explore_naming_pattern.variables: - dataset_name = dataset_name.replace( - "{" + v + "}", self.get_mapping(v, config) - ) + dataset_name = config.explore_naming_pattern.replace_variables( + self.get_mapping(config) + ) return builder.make_dataset_urn_with_platform_instance( platform=config.platform_name, @@ -714,12 +693,9 @@ def get_explore_urn(self, config: LookerCommonConfig) -> str: ) def get_explore_browse_path(self, config: LookerCommonConfig) -> str: - browse_path = config.explore_browse_pattern.pattern - assert config.explore_browse_pattern.variables is not None - for v in config.explore_browse_pattern.variables: - browse_path = browse_path.replace( - "{" + v + "}", self.get_mapping(v, config) - ) + browse_path = config.explore_browse_pattern.replace_variables( + self.get_mapping(config) + ) return browse_path def _get_url(self, base_url): From 2f79b50c2479b4af2838138c6e26bd0ab87945da Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 5 Oct 2022 05:54:38 +0200 Subject: [PATCH 36/76] fix(ingest): presto-on-hive - not failing on Hive type parsing error (#6118) Co-authored-by: Shirshanka Das --- .../ingestion/extractor/schema_util.py | 18 ++++++--- .../datahub/utilities/hive_schema_to_avro.py | 39 ++++++++++++++----- .../tests/unit/utilities/__init__.py | 0 .../utilities/test_hive_schema_to_avro.py | 37 ++++++++++++++++++ 4 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 metadata-ingestion/tests/unit/utilities/__init__.py create mode 100644 metadata-ingestion/tests/unit/utilities/test_hive_schema_to_avro.py diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py index faa035dbfb8262..ad04d6352e466e 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py @@ -507,22 +507,28 @@ def to_mce_fields( def avro_schema_to_mce_fields( - avro_schema_string: str, is_key_schema: bool = False, default_nullable: bool = False + avro_schema_string: str, + is_key_schema: bool = False, + default_nullable: bool = False, + swallow_exceptions: bool = True, ) -> List[SchemaField]: """ Converts an avro schema into schema fields compatible with MCE. :param avro_schema_string: String representation of the AVRO schema. :param is_key_schema: True if it is a key-schema. Default is False (value-schema). + :param swallow_exceptions: True if the caller wants exceptions to be suppressed :return: The list of MCE compatible SchemaFields. """ - schema_fields: List[SchemaField] = [] + try: - schema_fields = list( + return list( AvroToMceSchemaConverter.to_mce_fields( avro_schema_string, is_key_schema, default_nullable ) ) except Exception: - logger.exception(f"Failed to parse {avro_schema_string} to mce_fields.") - - return schema_fields + if swallow_exceptions: + logger.exception(f"Failed to parse {avro_schema_string} into mce fields.") + return [] + else: + raise diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py index fe59ad85fc0a59..ba85788adc61c6 100644 --- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py +++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py @@ -1,10 +1,14 @@ import json +import logging import re import uuid from typing import Any, Dict, List, Optional, Union from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField +from datahub.metadata.schema_classes import NullTypeClass, SchemaFieldDataTypeClass + +logger: logging.Logger = logging.getLogger(__name__) class HiveColumnToAvroConverter: @@ -102,7 +106,7 @@ def _parse_datatype_string( @staticmethod def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]: parts = HiveColumnToAvroConverter._ignore_brackets_split(s, ",") - fields = [] + fields: List[Dict] = [] for part in parts: name_and_type = HiveColumnToAvroConverter._ignore_brackets_split( part.strip(), HiveColumnToAvroConverter._STRUCT_TYPE_SEPARATOR @@ -123,7 +127,9 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]: field_type = HiveColumnToAvroConverter._parse_datatype_string( name_and_type[1] ) - fields.append({"name": field_name, "type": field_type}) + + if not any(field["name"] == field_name for field in fields): + fields.append({"name": field_name, "type": field_type}) if kwargs.get("ustruct_seqn") is not None: struct_name = f'__structn_{kwargs["ustruct_seqn"]}_{str(uuid.uuid4()).replace("-", "")}' @@ -259,13 +265,28 @@ def get_schema_fields_for_hive_column( default_nullable: bool = False, is_part_of_key: bool = False, ) -> List[SchemaField]: - avro_schema_json = get_avro_schema_for_hive_column( - hive_column_name=hive_column_name, hive_column_type=hive_column_type - ) - schema_fields = avro_schema_to_mce_fields( - avro_schema_string=json.dumps(avro_schema_json), - default_nullable=default_nullable, - ) + + try: + avro_schema_json = get_avro_schema_for_hive_column( + hive_column_name=hive_column_name, hive_column_type=hive_column_type + ) + schema_fields = avro_schema_to_mce_fields( + avro_schema_string=json.dumps(avro_schema_json), + default_nullable=default_nullable, + swallow_exceptions=False, + ) + except Exception as e: + logger.warning( + f"Unable to parse column {hive_column_name} and type {hive_column_type} the error was: {e}" + ) + schema_fields = [ + SchemaField( + fieldPath=hive_column_name, + type=SchemaFieldDataTypeClass(type=NullTypeClass()), + nativeDataType=hive_column_type, + ) + ] + assert schema_fields if HiveColumnToAvroConverter.is_primitive_hive_type(hive_column_type): # Primitive avro schema does not have any field names. Append it to fieldPath. diff --git a/metadata-ingestion/tests/unit/utilities/__init__.py b/metadata-ingestion/tests/unit/utilities/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/unit/utilities/test_hive_schema_to_avro.py b/metadata-ingestion/tests/unit/utilities/test_hive_schema_to_avro.py new file mode 100644 index 00000000000000..56b48a130936ca --- /dev/null +++ b/metadata-ingestion/tests/unit/utilities/test_hive_schema_to_avro.py @@ -0,0 +1,37 @@ +from datahub.metadata.schema_classes import ( + NullTypeClass, + NumberTypeClass, + RecordTypeClass, +) +from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column + + +def test_get_avro_schema_for_hive_column(): + schema_fields = get_schema_fields_for_hive_column("test", "int") + assert schema_fields[0].type.type == NumberTypeClass() + # Len will be the struct + 2 key there which should remain after the deduplication + assert len(schema_fields) == 1 + + +def test_get_avro_schema_for_struct_hive_column(): + schema_fields = get_schema_fields_for_hive_column("test", "struct") + assert schema_fields[0].type.type == RecordTypeClass() + assert len(schema_fields) == 2 + + +def test_get_avro_schema_for_struct_hive_with_duplicate_column(): + schema_fields = get_schema_fields_for_hive_column( + "test", "struct" + ) + assert schema_fields[0].type.type == RecordTypeClass() + # Len will be the struct + 2 key there which should remain after the deduplication + assert len(schema_fields) == 3 + + +def test_get_avro_schema_for_struct_hive_with_duplicate_column2(): + invalid_schema: str = "struct!test:intdsfs, test2:int, test:int>" + schema_fields = get_schema_fields_for_hive_column("test", invalid_schema) + assert len(schema_fields) == 1 + assert schema_fields[0].type.type == NullTypeClass() + assert schema_fields[0].fieldPath == "test" + assert schema_fields[0].nativeDataType == invalid_schema From 4ee3ef14ef5dcde74ec9cab78fe11c71004e69c1 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Wed, 5 Oct 2022 09:31:11 +0530 Subject: [PATCH 37/76] fix(ingest): ignore usage and operation for snowflake datasets without schema (#6112) --- .../source/snowflake/snowflake_config.py | 2 +- .../source/snowflake/snowflake_profiler.py | 2 +- .../source/snowflake/snowflake_usage_v2.py | 38 +++++++++-- .../source/snowflake/snowflake_v2.py | 64 +++++++++++-------- 4 files changed, 72 insertions(+), 34 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 1d9098ff728e3a..9b9cb4cb538e9e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -76,7 +76,7 @@ def validate_unsupported_configs(cls, values: Dict) -> Dict: ) include_table_lineage = values.get("include_table_lineage") - # TODO: Allow lineage extraction irrespective of basic schema extraction, + # TODO: Allow lineage extraction and profiling irrespective of basic schema extraction, # as it seems possible with some refractor if not include_technical_schema and any( [include_profiles, delete_detection_enabled, include_table_lineage] diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index d7cebdae948801..195316db160fb9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -46,7 +46,6 @@ def get_workunits(self, databases: List[SnowflakeDatabase]) -> Iterable[WorkUnit "max_overflow", self.config.profiling.max_workers ) - # Otherwise, if column level profiling is enabled, use GE profiler. for db in databases: if not self.config.database_pattern.allowed(db.name): continue @@ -236,6 +235,7 @@ def generate_profiles( if len(ge_profile_requests) == 0: return + # Otherwise, if column level profiling is enabled, use GE profiler. ge_profiler = self.get_profiler_instance(db_name) yield from ge_profiler.generate_profiles( ge_profile_requests, max_workers, platform, profiler_args diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index 6759839628eb3b..60e0b3874d0810 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -92,7 +92,9 @@ def __init__(self, config: SnowflakeV2Config, report: SnowflakeV2Report) -> None self.report: SnowflakeV2Report = report self.logger = logger - def get_workunits(self) -> Iterable[MetadataWorkUnit]: + def get_workunits( + self, discovered_datasets: List[str] + ) -> Iterable[MetadataWorkUnit]: conn = self.config.get_connection() logger.info("Checking usage date ranges") @@ -107,18 +109,20 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: # Now, we report the usage as well as operation metadata even if user email is absent if self.config.include_usage_stats: - yield from self.get_usage_workunits(conn) + yield from self.get_usage_workunits(conn, discovered_datasets) if self.config.include_operational_stats: # Generate the operation workunits. access_events = self._get_snowflake_history(conn) for event in access_events: - yield from self._get_operation_aspect_work_unit(event) + yield from self._get_operation_aspect_work_unit( + event, discovered_datasets + ) conn.close() def get_usage_workunits( - self, conn: SnowflakeConnection + self, conn: SnowflakeConnection, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: with PerfTimer() as timer: @@ -144,6 +148,15 @@ def get_usage_workunits( ): continue + dataset_identifier = self.get_dataset_identifier_from_qualified_name( + row["OBJECT_NAME"] + ) + if dataset_identifier not in discovered_datasets: + logger.debug( + f"Skipping usage for table {dataset_identifier}, as table schema is not accessible" + ) + continue + stats = DatasetUsageStatistics( timestampMillis=int(row["BUCKET_START_TIME"].timestamp() * 1000), eventGranularity=TimeWindowSize( @@ -161,7 +174,7 @@ def get_usage_workunits( ) dataset_urn = make_dataset_urn_with_platform_instance( self.platform, - self.get_dataset_identifier_from_qualified_name(row["OBJECT_NAME"]), + dataset_identifier, self.config.platform_instance, self.config.env, ) @@ -276,7 +289,7 @@ def _check_usage_date_ranges(self, conn: SnowflakeConnection) -> Any: ) def _get_operation_aspect_work_unit( - self, event: SnowflakeJoinedAccessEvent + self, event: SnowflakeJoinedAccessEvent, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: if event.query_start_time and event.query_type in OPERATION_STATEMENT_TYPES: start_time = event.query_start_time @@ -292,9 +305,20 @@ def _get_operation_aspect_work_unit( for obj in event.objects_modified: resource = obj.objectName + + dataset_identifier = self.get_dataset_identifier_from_qualified_name( + resource + ) + + if dataset_identifier not in discovered_datasets: + logger.debug( + f"Skipping operations for table {dataset_identifier}, as table schema is not accessible" + ) + continue + dataset_urn = make_dataset_urn_with_platform_instance( self.platform, - self.get_dataset_identifier_from_qualified_name(resource), + dataset_identifier, self.config.platform_instance, self.config.env, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 54cee37b4edf39..0a901385622f54 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -205,9 +205,8 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): cached_domains=[k for k in self.config.domain], graph=self.ctx.graph ) - if self.config.include_technical_schema: - # For database, schema, tables, views, etc - self.data_dictionary = SnowflakeDataDictionary() + # For database, schema, tables, views, etc + self.data_dictionary = SnowflakeDataDictionary() if config.include_table_lineage: # For lineage @@ -430,25 +429,24 @@ def get_workunits(self) -> Iterable[WorkUnit]: self.inspect_session_metadata(conn) self.report.include_technical_schema = self.config.include_technical_schema - if self.config.include_technical_schema: - databases: List[SnowflakeDatabase] = self.data_dictionary.get_databases( - conn - ) - for snowflake_db in databases: - self.report.report_entity_scanned(snowflake_db.name, "database") + databases: List[SnowflakeDatabase] = [] + + databases = self.data_dictionary.get_databases(conn) + for snowflake_db in databases: + self.report.report_entity_scanned(snowflake_db.name, "database") - if not self.config.database_pattern.allowed(snowflake_db.name): - self.report.report_dropped(f"{snowflake_db.name}.*") - continue + if not self.config.database_pattern.allowed(snowflake_db.name): + self.report.report_dropped(f"{snowflake_db.name}.*") + continue - yield from self._process_database(conn, snowflake_db) + yield from self._process_database(conn, snowflake_db) - conn.close() - # Emit Stale entity workunits - yield from self.stale_entity_removal_handler.gen_removed_entity_workunits() + conn.close() + # Emit Stale entity workunits + yield from self.stale_entity_removal_handler.gen_removed_entity_workunits() - if self.config.profiling.enabled and len(databases) != 0: - yield from self.profiler.get_workunits(databases) + if self.config.profiling.enabled and len(databases) != 0: + yield from self.profiler.get_workunits(databases) if self.config.include_usage_stats or self.config.include_operational_stats: if self.redundant_run_skip_handler.should_skip_this_run( @@ -462,14 +460,27 @@ def get_workunits(self) -> Iterable[WorkUnit]: start_time_millis=datetime_to_ts_millis(self.config.start_time), end_time_millis=datetime_to_ts_millis(self.config.end_time), ) - yield from self.usage_extractor.get_workunits() + + discovered_datasets: List[str] = [ + self.get_dataset_identifier(table.name, schema.name, db.name) + for db in databases + for schema in db.schemas + for table in schema.tables + ] + [ + self.get_dataset_identifier(table.name, schema.name, db.name) + for db in databases + for schema in db.schemas + for table in schema.views + ] + yield from self.usage_extractor.get_workunits(discovered_datasets) def _process_database( self, conn: SnowflakeConnection, snowflake_db: SnowflakeDatabase ) -> Iterable[MetadataWorkUnit]: db_name = snowflake_db.name - yield from self.gen_database_containers(snowflake_db) + if self.config.include_technical_schema: + yield from self.gen_database_containers(snowflake_db) # Use database and extract metadata from its information_schema # If this query fails, it means, user does not have usage access on database @@ -501,23 +512,26 @@ def _process_schema( self, conn: SnowflakeConnection, snowflake_schema: SnowflakeSchema, db_name: str ) -> Iterable[MetadataWorkUnit]: schema_name = snowflake_schema.name - yield from self.gen_schema_containers(snowflake_schema, db_name) + if self.config.include_technical_schema: + yield from self.gen_schema_containers(snowflake_schema, db_name) if self.config.include_tables: snowflake_schema.tables = self.get_tables_for_schema( conn, schema_name, db_name ) - for table in snowflake_schema.tables: - yield from self._process_table(conn, table, schema_name, db_name) + if self.config.include_technical_schema: + for table in snowflake_schema.tables: + yield from self._process_table(conn, table, schema_name, db_name) if self.config.include_views: snowflake_schema.views = self.get_views_for_schema( conn, schema_name, db_name ) - for view in snowflake_schema.views: - yield from self._process_view(conn, view, schema_name, db_name) + if self.config.include_technical_schema: + for view in snowflake_schema.views: + yield from self._process_view(conn, view, schema_name, db_name) def _process_table( self, From 164e47a2e9beb80c1f79f025246ae7a981e64d44 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 5 Oct 2022 04:12:05 +0000 Subject: [PATCH 38/76] refactor(ingest): remove typing workarounds (#6108) Possible now that we're on mypy 0.980. --- .../src/datahub/ingestion/api/committable.py | 25 +++---------------- .../src/datahub/ingestion/api/common.py | 7 +----- .../src/datahub/ingestion/api/source.py | 3 +-- 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/api/committable.py b/metadata-ingestion/src/datahub/ingestion/api/committable.py index e41eb24abc2d96..f51fed059087db 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/committable.py +++ b/metadata-ingestion/src/datahub/ingestion/api/committable.py @@ -11,17 +11,10 @@ class CommitPolicy(Enum): @dataclass -class _CommittableConcrete: +class Committable(ABC): name: str commit_policy: CommitPolicy - committed: bool - - -# The concrete portion Committable is separated from the abstract portion due to -# https://github.com/python/mypy/issues/5374#issuecomment-568335302. -class Committable(_CommittableConcrete, ABC): - def __init__(self, name: str, commit_policy: CommitPolicy): - super(Committable, self).__init__(name, commit_policy, committed=False) + committed: bool = False @abstractmethod def commit(self) -> None: @@ -34,25 +27,15 @@ def commit(self) -> None: FilterType = TypeVar("FilterType") -class _StatefulCommittableConcrete(Generic[StateType]): - def __init__(self, state_to_commit: StateType): - self.state_to_commit: StateType = state_to_commit - - class StatefulCommittable( Committable, - _StatefulCommittableConcrete[StateType], Generic[StateKeyType, StateType, FilterType], ): def __init__( self, name: str, commit_policy: CommitPolicy, state_to_commit: StateType ): - # _ConcreteCommittable will be the first from this class. - super(StatefulCommittable, self).__init__( - name=name, commit_policy=commit_policy - ) - # _StatefulCommittableConcrete will be after _CommittableConcrete in the __mro__. - super(_CommittableConcrete, self).__init__(state_to_commit=state_to_commit) + super().__init__(name=name, commit_policy=commit_policy) + self.state_to_commit: StateType = state_to_commit def has_successfully_committed(self) -> bool: return bool(not self.state_to_commit or self.committed) diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py index 78f12fd27a3b74..e919be043fcf28 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/common.py +++ b/metadata-ingestion/src/datahub/ingestion/api/common.py @@ -33,14 +33,9 @@ class EndOfStream(ControlRecord): @dataclass -class _WorkUnitId(metaclass=ABCMeta): +class WorkUnit(metaclass=ABCMeta): id: str - -# For information on why the WorkUnit class is structured this way -# and is separating the dataclass portion from the abstract methods, see -# https://github.com/python/mypy/issues/5374#issuecomment-568335302. -class WorkUnit(_WorkUnitId, metaclass=ABCMeta): @abstractmethod def get_metadata(self) -> dict: pass diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index c72fed1772b2e7..5f7444284460d9 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -114,8 +114,7 @@ def get_records(self, workunit: WorkUnitType) -> Iterable[RecordEnvelope]: pass -# See https://github.com/python/mypy/issues/5374 for why we suppress this mypy error. -@dataclass # type: ignore[misc] +@dataclass class Source(Closeable, metaclass=ABCMeta): ctx: PipelineContext From 395d7ea2f5d77cbee1776fa59b8caea67edb7648 Mon Sep 17 00:00:00 2001 From: PrashantKhadke Date: Tue, 4 Oct 2022 21:13:14 -0700 Subject: [PATCH 39/76] docs(okta): Added information about AUTH_OIDC_EXTRACT_GROUPS_ENABLED (#6120) * Added information about AUTH_OIDC_EXTRACT_GROUPS_ENABLED * Update configure-oidc-react-okta.md Co-authored-by: Aditya Radhakrishnan --- docs/authentication/guides/sso/configure-oidc-react-okta.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/authentication/guides/sso/configure-oidc-react-okta.md b/docs/authentication/guides/sso/configure-oidc-react-okta.md index 6e7119e8368cbb..3766d7dbc20caa 100644 --- a/docs/authentication/guides/sso/configure-oidc-react-okta.md +++ b/docs/authentication/guides/sso/configure-oidc-react-okta.md @@ -84,7 +84,7 @@ AUTH_OIDC_SCOPE="openid profile email groups" Replacing the placeholders above with the client id & client secret received from Okta in Step 2. -> **Pro Tip!** You can easily enable Okta to return the groups that a user is associated with, which will be provisioned in DataHub, along with the user logging in, +> **Pro Tip!** You can easily enable Okta to return the groups that a user is associated with, which will be provisioned in DataHub, along with the user logging in. This can be enabled by setting the `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` flag to `true`. > if they do not already exist in DataHub. You can enable your Okta application to return a 'groups' claim from the Okta Console at Applications > Your Application -> Sign On -> OpenID Connect ID Token Settings (Requires an edit). > > By default, we assume that the groups will appear in a claim named "groups". This can be customized using the `AUTH_OIDC_GROUPS_CLAIM` container configuration. From 864e64de8f08ba7092f42d0020da641cf5114fa9 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Wed, 5 Oct 2022 13:54:26 -0700 Subject: [PATCH 40/76] feat(lineage): show fully qualified task name in lineage UI (#6126) * showing fully qualified task name in lineage UI * slight refactor * use display name * fix test --- .../src/app/entity/dataJob/DataJobEntity.tsx | 22 ++++++++++++++++++- .../lineage/__tests__/constructTree.test.ts | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx b/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx index 7e5ed2b5d482d2..02d73982550e87 100644 --- a/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx +++ b/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx @@ -17,6 +17,7 @@ import { getDataForEntityType } from '../shared/containers/profile/utils'; import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; import { RunsTab } from './tabs/RunsTab'; import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; +import { DataFlowEntity } from '../dataFlow/DataFlowEntity'; const getDataJobPlatformName = (data?: DataJob): string => { return data?.dataFlow?.platform?.properties?.displayName || data?.dataFlow?.platform?.name || ''; @@ -174,10 +175,29 @@ export class DataJobEntity implements Entity { ); }; + getExpandedNameForDataJob = (entity: DataJob): string => { + const name = this.displayName(entity); + const flowName = entity?.dataFlow ? new DataFlowEntity().displayName(entity?.dataFlow) : undefined; + + // if we have no name, just return blank. this should not happen, so dont try & construct a name + if (!name) { + return ''; + } + + // if we have a flow name, return the full name of flow.task + if (flowName) { + return `${flowName}.${name}`; + } + + // otherwise, just return the task name (same as non-expanded) + return name; + }; + getLineageVizConfig = (entity: DataJob) => { return { urn: entity?.urn, - name: entity?.properties?.name || '', + name: this.displayName(entity), + expandedName: this.getExpandedNameForDataJob(entity), type: EntityType.DataJob, icon: entity?.dataFlow?.platform?.properties?.logoUrl || '', platform: entity?.dataFlow?.platform, diff --git a/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts b/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts index 158a47f6435fce..319d4e66fa9270 100644 --- a/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts +++ b/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts @@ -375,7 +375,7 @@ describe('constructTree', () => { children: [ { name: 'DataJobInfoName', - expandedName: undefined, + expandedName: 'DataFlowInfoName.DataJobInfoName', type: EntityType.DataJob, unexploredChildren: 0, urn: dataJob1.urn, From 5fa8818dd30f2d25357abc5b403821ca40f43161 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Wed, 5 Oct 2022 14:50:44 -0700 Subject: [PATCH 41/76] docs(tableau): adding an ingestion video (#6124) --- .../tableau/{tableau.md => tableau_pre.md} | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) rename metadata-ingestion/docs/sources/tableau/{tableau.md => tableau_pre.md} (95%) diff --git a/metadata-ingestion/docs/sources/tableau/tableau.md b/metadata-ingestion/docs/sources/tableau/tableau_pre.md similarity index 95% rename from metadata-ingestion/docs/sources/tableau/tableau.md rename to metadata-ingestion/docs/sources/tableau/tableau_pre.md index dda55ececef879..838d9117420127 100644 --- a/metadata-ingestion/docs/sources/tableau/tableau.md +++ b/metadata-ingestion/docs/sources/tableau/tableau_pre.md @@ -6,6 +6,33 @@ In order to ingest metadata from tableau, you will need: - [Enable the Tableau Metadata API](https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html#enable-the-tableau-metadata-api-for-tableau-server) for Tableau Server, if its not already enabled. - Tableau Credentials (Username/Password or [Personal Access Token](https://help.tableau.com/current/pro/desktop/en-us/useracct.htm#create-and-revoke-personal-access-tokens)) +### Ingestion through UI + +The following video shows you how to get started with ingesting Tableau metadata through the UI. + +
+